diff options
Diffstat (limited to 'mm/memcontrol.c')
| -rw-r--r-- | mm/memcontrol.c | 465 |
1 files changed, 214 insertions, 251 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 36896f3eb7f5..866dcc7eeb0c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
| @@ -32,11 +32,12 @@ | |||
| 32 | #include <linux/fs.h> | 32 | #include <linux/fs.h> |
| 33 | #include <linux/seq_file.h> | 33 | #include <linux/seq_file.h> |
| 34 | #include <linux/vmalloc.h> | 34 | #include <linux/vmalloc.h> |
| 35 | #include <linux/mm_inline.h> | ||
| 36 | #include <linux/page_cgroup.h> | ||
| 35 | 37 | ||
| 36 | #include <asm/uaccess.h> | 38 | #include <asm/uaccess.h> |
| 37 | 39 | ||
| 38 | struct cgroup_subsys mem_cgroup_subsys __read_mostly; | 40 | struct cgroup_subsys mem_cgroup_subsys __read_mostly; |
| 39 | static struct kmem_cache *page_cgroup_cache __read_mostly; | ||
| 40 | #define MEM_CGROUP_RECLAIM_RETRIES 5 | 41 | #define MEM_CGROUP_RECLAIM_RETRIES 5 |
| 41 | 42 | ||
| 42 | /* | 43 | /* |
| @@ -65,11 +66,10 @@ struct mem_cgroup_stat { | |||
| 65 | /* | 66 | /* |
| 66 | * For accounting under irq disable, no need for increment preempt count. | 67 | * For accounting under irq disable, no need for increment preempt count. |
| 67 | */ | 68 | */ |
| 68 | static void __mem_cgroup_stat_add_safe(struct mem_cgroup_stat *stat, | 69 | static inline void __mem_cgroup_stat_add_safe(struct mem_cgroup_stat_cpu *stat, |
| 69 | enum mem_cgroup_stat_index idx, int val) | 70 | enum mem_cgroup_stat_index idx, int val) |
| 70 | { | 71 | { |
| 71 | int cpu = smp_processor_id(); | 72 | stat->count[idx] += val; |
| 72 | stat->cpustat[cpu].count[idx] += val; | ||
| 73 | } | 73 | } |
| 74 | 74 | ||
| 75 | static s64 mem_cgroup_read_stat(struct mem_cgroup_stat *stat, | 75 | static s64 mem_cgroup_read_stat(struct mem_cgroup_stat *stat, |
| @@ -85,22 +85,13 @@ static s64 mem_cgroup_read_stat(struct mem_cgroup_stat *stat, | |||
| 85 | /* | 85 | /* |
| 86 | * per-zone information in memory controller. | 86 | * per-zone information in memory controller. |
| 87 | */ | 87 | */ |
| 88 | |||
| 89 | enum mem_cgroup_zstat_index { | ||
| 90 | MEM_CGROUP_ZSTAT_ACTIVE, | ||
| 91 | MEM_CGROUP_ZSTAT_INACTIVE, | ||
| 92 | |||
| 93 | NR_MEM_CGROUP_ZSTAT, | ||
| 94 | }; | ||
| 95 | |||
| 96 | struct mem_cgroup_per_zone { | 88 | struct mem_cgroup_per_zone { |
| 97 | /* | 89 | /* |
| 98 | * spin_lock to protect the per cgroup LRU | 90 | * spin_lock to protect the per cgroup LRU |
| 99 | */ | 91 | */ |
| 100 | spinlock_t lru_lock; | 92 | spinlock_t lru_lock; |
| 101 | struct list_head active_list; | 93 | struct list_head lists[NR_LRU_LISTS]; |
| 102 | struct list_head inactive_list; | 94 | unsigned long count[NR_LRU_LISTS]; |
| 103 | unsigned long count[NR_MEM_CGROUP_ZSTAT]; | ||
| 104 | }; | 95 | }; |
| 105 | /* Macro for accessing counter */ | 96 | /* Macro for accessing counter */ |
| 106 | #define MEM_CGROUP_ZSTAT(mz, idx) ((mz)->count[(idx)]) | 97 | #define MEM_CGROUP_ZSTAT(mz, idx) ((mz)->count[(idx)]) |
| @@ -144,69 +135,52 @@ struct mem_cgroup { | |||
| 144 | }; | 135 | }; |
| 145 | static struct mem_cgroup init_mem_cgroup; | 136 | static struct mem_cgroup init_mem_cgroup; |
| 146 | 137 | ||
| 147 | /* | ||
| 148 | * We use the lower bit of the page->page_cgroup pointer as a bit spin | ||
| 149 | * lock. We need to ensure that page->page_cgroup is at least two | ||
| 150 | * byte aligned (based on comments from Nick Piggin). But since | ||
| 151 | * bit_spin_lock doesn't actually set that lock bit in a non-debug | ||
| 152 | * uniprocessor kernel, we should avoid setting it here too. | ||
| 153 | */ | ||
| 154 | #define PAGE_CGROUP_LOCK_BIT 0x0 | ||
| 155 | #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) | ||
| 156 | #define PAGE_CGROUP_LOCK (1 << PAGE_CGROUP_LOCK_BIT) | ||
| 157 | #else | ||
| 158 | #define PAGE_CGROUP_LOCK 0x0 | ||
| 159 | #endif | ||
| 160 | |||
| 161 | /* | ||
| 162 | * A page_cgroup page is associated with every page descriptor. The | ||
| 163 | * page_cgroup helps us identify information about the cgroup | ||
| 164 | */ | ||
| 165 | struct page_cgroup { | ||
| 166 | struct list_head lru; /* per cgroup LRU list */ | ||
| 167 | struct page *page; | ||
| 168 | struct mem_cgroup *mem_cgroup; | ||
| 169 | int flags; | ||
| 170 | }; | ||
| 171 | #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ | ||
| 172 | #define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */ | ||
| 173 | |||
| 174 | static int page_cgroup_nid(struct page_cgroup *pc) | ||
| 175 | { | ||
| 176 | return page_to_nid(pc->page); | ||
| 177 | } | ||
| 178 | |||
| 179 | static enum zone_type page_cgroup_zid(struct page_cgroup *pc) | ||
| 180 | { | ||
| 181 | return page_zonenum(pc->page); | ||
| 182 | } | ||
| 183 | |||
| 184 | enum charge_type { | 138 | enum charge_type { |
| 185 | MEM_CGROUP_CHARGE_TYPE_CACHE = 0, | 139 | MEM_CGROUP_CHARGE_TYPE_CACHE = 0, |
| 186 | MEM_CGROUP_CHARGE_TYPE_MAPPED, | 140 | MEM_CGROUP_CHARGE_TYPE_MAPPED, |
| 141 | MEM_CGROUP_CHARGE_TYPE_SHMEM, /* used by page migration of shmem */ | ||
| 187 | MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */ | 142 | MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */ |
| 143 | NR_CHARGE_TYPE, | ||
| 144 | }; | ||
| 145 | |||
| 146 | /* only for here (for easy reading.) */ | ||
| 147 | #define PCGF_CACHE (1UL << PCG_CACHE) | ||
| 148 | #define PCGF_USED (1UL << PCG_USED) | ||
| 149 | #define PCGF_ACTIVE (1UL << PCG_ACTIVE) | ||
| 150 | #define PCGF_LOCK (1UL << PCG_LOCK) | ||
| 151 | #define PCGF_FILE (1UL << PCG_FILE) | ||
| 152 | static const unsigned long | ||
| 153 | pcg_default_flags[NR_CHARGE_TYPE] = { | ||
| 154 | PCGF_CACHE | PCGF_FILE | PCGF_USED | PCGF_LOCK, /* File Cache */ | ||
| 155 | PCGF_ACTIVE | PCGF_USED | PCGF_LOCK, /* Anon */ | ||
| 156 | PCGF_ACTIVE | PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* Shmem */ | ||
| 157 | 0, /* FORCE */ | ||
| 188 | }; | 158 | }; |
| 189 | 159 | ||
| 190 | /* | 160 | /* |
| 191 | * Always modified under lru lock. Then, not necessary to preempt_disable() | 161 | * Always modified under lru lock. Then, not necessary to preempt_disable() |
| 192 | */ | 162 | */ |
| 193 | static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, int flags, | 163 | static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, |
| 194 | bool charge) | 164 | struct page_cgroup *pc, |
| 165 | bool charge) | ||
| 195 | { | 166 | { |
| 196 | int val = (charge)? 1 : -1; | 167 | int val = (charge)? 1 : -1; |
| 197 | struct mem_cgroup_stat *stat = &mem->stat; | 168 | struct mem_cgroup_stat *stat = &mem->stat; |
| 169 | struct mem_cgroup_stat_cpu *cpustat; | ||
| 198 | 170 | ||
| 199 | VM_BUG_ON(!irqs_disabled()); | 171 | VM_BUG_ON(!irqs_disabled()); |
| 200 | if (flags & PAGE_CGROUP_FLAG_CACHE) | 172 | |
| 201 | __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_CACHE, val); | 173 | cpustat = &stat->cpustat[smp_processor_id()]; |
| 174 | if (PageCgroupCache(pc)) | ||
| 175 | __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_CACHE, val); | ||
| 202 | else | 176 | else |
| 203 | __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_RSS, val); | 177 | __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_RSS, val); |
| 204 | 178 | ||
| 205 | if (charge) | 179 | if (charge) |
| 206 | __mem_cgroup_stat_add_safe(stat, | 180 | __mem_cgroup_stat_add_safe(cpustat, |
| 207 | MEM_CGROUP_STAT_PGPGIN_COUNT, 1); | 181 | MEM_CGROUP_STAT_PGPGIN_COUNT, 1); |
| 208 | else | 182 | else |
| 209 | __mem_cgroup_stat_add_safe(stat, | 183 | __mem_cgroup_stat_add_safe(cpustat, |
| 210 | MEM_CGROUP_STAT_PGPGOUT_COUNT, 1); | 184 | MEM_CGROUP_STAT_PGPGOUT_COUNT, 1); |
| 211 | } | 185 | } |
| 212 | 186 | ||
| @@ -227,7 +201,7 @@ page_cgroup_zoneinfo(struct page_cgroup *pc) | |||
| 227 | } | 201 | } |
| 228 | 202 | ||
| 229 | static unsigned long mem_cgroup_get_all_zonestat(struct mem_cgroup *mem, | 203 | static unsigned long mem_cgroup_get_all_zonestat(struct mem_cgroup *mem, |
| 230 | enum mem_cgroup_zstat_index idx) | 204 | enum lru_list idx) |
| 231 | { | 205 | { |
| 232 | int nid, zid; | 206 | int nid, zid; |
| 233 | struct mem_cgroup_per_zone *mz; | 207 | struct mem_cgroup_per_zone *mz; |
| @@ -262,85 +236,77 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) | |||
| 262 | struct mem_cgroup, css); | 236 | struct mem_cgroup, css); |
| 263 | } | 237 | } |
| 264 | 238 | ||
| 265 | static inline int page_cgroup_locked(struct page *page) | ||
| 266 | { | ||
| 267 | return bit_spin_is_locked(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); | ||
| 268 | } | ||
| 269 | |||
| 270 | static void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc) | ||
| 271 | { | ||
| 272 | VM_BUG_ON(!page_cgroup_locked(page)); | ||
| 273 | page->page_cgroup = ((unsigned long)pc | PAGE_CGROUP_LOCK); | ||
| 274 | } | ||
| 275 | |||
| 276 | struct page_cgroup *page_get_page_cgroup(struct page *page) | ||
| 277 | { | ||
| 278 | return (struct page_cgroup *) (page->page_cgroup & ~PAGE_CGROUP_LOCK); | ||
| 279 | } | ||
| 280 | |||
| 281 | static void lock_page_cgroup(struct page *page) | ||
| 282 | { | ||
| 283 | bit_spin_lock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); | ||
| 284 | } | ||
| 285 | |||
| 286 | static int try_lock_page_cgroup(struct page *page) | ||
| 287 | { | ||
| 288 | return bit_spin_trylock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); | ||
| 289 | } | ||
| 290 | |||
| 291 | static void unlock_page_cgroup(struct page *page) | ||
| 292 | { | ||
| 293 | bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); | ||
| 294 | } | ||
| 295 | |||
| 296 | static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz, | 239 | static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz, |
| 297 | struct page_cgroup *pc) | 240 | struct page_cgroup *pc) |
| 298 | { | 241 | { |
| 299 | int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; | 242 | int lru = LRU_BASE; |
| 243 | |||
| 244 | if (PageCgroupUnevictable(pc)) | ||
| 245 | lru = LRU_UNEVICTABLE; | ||
| 246 | else { | ||
| 247 | if (PageCgroupActive(pc)) | ||
| 248 | lru += LRU_ACTIVE; | ||
| 249 | if (PageCgroupFile(pc)) | ||
| 250 | lru += LRU_FILE; | ||
| 251 | } | ||
| 300 | 252 | ||
| 301 | if (from) | 253 | MEM_CGROUP_ZSTAT(mz, lru) -= 1; |
| 302 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) -= 1; | ||
| 303 | else | ||
| 304 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1; | ||
| 305 | 254 | ||
| 306 | mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, false); | 255 | mem_cgroup_charge_statistics(pc->mem_cgroup, pc, false); |
| 307 | list_del(&pc->lru); | 256 | list_del(&pc->lru); |
| 308 | } | 257 | } |
| 309 | 258 | ||
| 310 | static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, | 259 | static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, |
| 311 | struct page_cgroup *pc) | 260 | struct page_cgroup *pc) |
| 312 | { | 261 | { |
| 313 | int to = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; | 262 | int lru = LRU_BASE; |
| 314 | 263 | ||
| 315 | if (!to) { | 264 | if (PageCgroupUnevictable(pc)) |
| 316 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) += 1; | 265 | lru = LRU_UNEVICTABLE; |
| 317 | list_add(&pc->lru, &mz->inactive_list); | 266 | else { |
| 318 | } else { | 267 | if (PageCgroupActive(pc)) |
| 319 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) += 1; | 268 | lru += LRU_ACTIVE; |
| 320 | list_add(&pc->lru, &mz->active_list); | 269 | if (PageCgroupFile(pc)) |
| 270 | lru += LRU_FILE; | ||
| 321 | } | 271 | } |
| 322 | mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, true); | 272 | |
| 273 | MEM_CGROUP_ZSTAT(mz, lru) += 1; | ||
| 274 | list_add(&pc->lru, &mz->lists[lru]); | ||
| 275 | |||
| 276 | mem_cgroup_charge_statistics(pc->mem_cgroup, pc, true); | ||
| 323 | } | 277 | } |
| 324 | 278 | ||
| 325 | static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active) | 279 | static void __mem_cgroup_move_lists(struct page_cgroup *pc, enum lru_list lru) |
| 326 | { | 280 | { |
| 327 | int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; | ||
| 328 | struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc); | 281 | struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc); |
| 282 | int active = PageCgroupActive(pc); | ||
| 283 | int file = PageCgroupFile(pc); | ||
| 284 | int unevictable = PageCgroupUnevictable(pc); | ||
| 285 | enum lru_list from = unevictable ? LRU_UNEVICTABLE : | ||
| 286 | (LRU_FILE * !!file + !!active); | ||
| 329 | 287 | ||
| 330 | if (from) | 288 | if (lru == from) |
| 331 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) -= 1; | 289 | return; |
| 332 | else | ||
| 333 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1; | ||
| 334 | 290 | ||
| 335 | if (active) { | 291 | MEM_CGROUP_ZSTAT(mz, from) -= 1; |
| 336 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) += 1; | 292 | /* |
| 337 | pc->flags |= PAGE_CGROUP_FLAG_ACTIVE; | 293 | * However this is done under mz->lru_lock, another flags, which |
| 338 | list_move(&pc->lru, &mz->active_list); | 294 | * are not related to LRU, will be modified from out-of-lock. |
| 295 | * We have to use atomic set/clear flags. | ||
| 296 | */ | ||
| 297 | if (is_unevictable_lru(lru)) { | ||
| 298 | ClearPageCgroupActive(pc); | ||
| 299 | SetPageCgroupUnevictable(pc); | ||
| 339 | } else { | 300 | } else { |
| 340 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) += 1; | 301 | if (is_active_lru(lru)) |
| 341 | pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE; | 302 | SetPageCgroupActive(pc); |
| 342 | list_move(&pc->lru, &mz->inactive_list); | 303 | else |
| 304 | ClearPageCgroupActive(pc); | ||
| 305 | ClearPageCgroupUnevictable(pc); | ||
| 343 | } | 306 | } |
| 307 | |||
| 308 | MEM_CGROUP_ZSTAT(mz, lru) += 1; | ||
| 309 | list_move(&pc->lru, &mz->lists[lru]); | ||
| 344 | } | 310 | } |
| 345 | 311 | ||
| 346 | int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) | 312 | int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) |
| @@ -356,7 +322,7 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) | |||
| 356 | /* | 322 | /* |
| 357 | * This routine assumes that the appropriate zone's lru lock is already held | 323 | * This routine assumes that the appropriate zone's lru lock is already held |
| 358 | */ | 324 | */ |
| 359 | void mem_cgroup_move_lists(struct page *page, bool active) | 325 | void mem_cgroup_move_lists(struct page *page, enum lru_list lru) |
| 360 | { | 326 | { |
| 361 | struct page_cgroup *pc; | 327 | struct page_cgroup *pc; |
| 362 | struct mem_cgroup_per_zone *mz; | 328 | struct mem_cgroup_per_zone *mz; |
| @@ -372,17 +338,16 @@ void mem_cgroup_move_lists(struct page *page, bool active) | |||
| 372 | * safely get to page_cgroup without it, so just try_lock it: | 338 | * safely get to page_cgroup without it, so just try_lock it: |
| 373 | * mem_cgroup_isolate_pages allows for page left on wrong list. | 339 | * mem_cgroup_isolate_pages allows for page left on wrong list. |
| 374 | */ | 340 | */ |
| 375 | if (!try_lock_page_cgroup(page)) | 341 | pc = lookup_page_cgroup(page); |
| 342 | if (!trylock_page_cgroup(pc)) | ||
| 376 | return; | 343 | return; |
| 377 | 344 | if (pc && PageCgroupUsed(pc)) { | |
| 378 | pc = page_get_page_cgroup(page); | ||
| 379 | if (pc) { | ||
| 380 | mz = page_cgroup_zoneinfo(pc); | 345 | mz = page_cgroup_zoneinfo(pc); |
| 381 | spin_lock_irqsave(&mz->lru_lock, flags); | 346 | spin_lock_irqsave(&mz->lru_lock, flags); |
| 382 | __mem_cgroup_move_lists(pc, active); | 347 | __mem_cgroup_move_lists(pc, lru); |
| 383 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 348 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
| 384 | } | 349 | } |
| 385 | unlock_page_cgroup(page); | 350 | unlock_page_cgroup(pc); |
| 386 | } | 351 | } |
| 387 | 352 | ||
| 388 | /* | 353 | /* |
| @@ -403,21 +368,6 @@ int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem) | |||
| 403 | } | 368 | } |
| 404 | 369 | ||
| 405 | /* | 370 | /* |
| 406 | * This function is called from vmscan.c. In page reclaiming loop. balance | ||
| 407 | * between active and inactive list is calculated. For memory controller | ||
| 408 | * page reclaiming, we should use using mem_cgroup's imbalance rather than | ||
| 409 | * zone's global lru imbalance. | ||
| 410 | */ | ||
| 411 | long mem_cgroup_reclaim_imbalance(struct mem_cgroup *mem) | ||
| 412 | { | ||
| 413 | unsigned long active, inactive; | ||
| 414 | /* active and inactive are the number of pages. 'long' is ok.*/ | ||
| 415 | active = mem_cgroup_get_all_zonestat(mem, MEM_CGROUP_ZSTAT_ACTIVE); | ||
| 416 | inactive = mem_cgroup_get_all_zonestat(mem, MEM_CGROUP_ZSTAT_INACTIVE); | ||
| 417 | return (long) (active / (inactive + 1)); | ||
| 418 | } | ||
| 419 | |||
| 420 | /* | ||
| 421 | * prev_priority control...this will be used in memory reclaim path. | 371 | * prev_priority control...this will be used in memory reclaim path. |
| 422 | */ | 372 | */ |
| 423 | int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem) | 373 | int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem) |
| @@ -444,28 +394,17 @@ void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, int priority) | |||
| 444 | * (see include/linux/mmzone.h) | 394 | * (see include/linux/mmzone.h) |
| 445 | */ | 395 | */ |
| 446 | 396 | ||
| 447 | long mem_cgroup_calc_reclaim_active(struct mem_cgroup *mem, | 397 | long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, |
| 448 | struct zone *zone, int priority) | 398 | int priority, enum lru_list lru) |
| 449 | { | 399 | { |
| 450 | long nr_active; | 400 | long nr_pages; |
| 451 | int nid = zone->zone_pgdat->node_id; | 401 | int nid = zone->zone_pgdat->node_id; |
| 452 | int zid = zone_idx(zone); | 402 | int zid = zone_idx(zone); |
| 453 | struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid); | 403 | struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid); |
| 454 | 404 | ||
| 455 | nr_active = MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE); | 405 | nr_pages = MEM_CGROUP_ZSTAT(mz, lru); |
| 456 | return (nr_active >> priority); | ||
| 457 | } | ||
| 458 | 406 | ||
| 459 | long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem, | 407 | return (nr_pages >> priority); |
| 460 | struct zone *zone, int priority) | ||
| 461 | { | ||
| 462 | long nr_inactive; | ||
| 463 | int nid = zone->zone_pgdat->node_id; | ||
| 464 | int zid = zone_idx(zone); | ||
| 465 | struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid); | ||
| 466 | |||
| 467 | nr_inactive = MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE); | ||
| 468 | return (nr_inactive >> priority); | ||
| 469 | } | 408 | } |
| 470 | 409 | ||
| 471 | unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | 410 | unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, |
| @@ -473,7 +412,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | |||
| 473 | unsigned long *scanned, int order, | 412 | unsigned long *scanned, int order, |
| 474 | int mode, struct zone *z, | 413 | int mode, struct zone *z, |
| 475 | struct mem_cgroup *mem_cont, | 414 | struct mem_cgroup *mem_cont, |
| 476 | int active) | 415 | int active, int file) |
| 477 | { | 416 | { |
| 478 | unsigned long nr_taken = 0; | 417 | unsigned long nr_taken = 0; |
| 479 | struct page *page; | 418 | struct page *page; |
| @@ -484,38 +423,38 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | |||
| 484 | int nid = z->zone_pgdat->node_id; | 423 | int nid = z->zone_pgdat->node_id; |
| 485 | int zid = zone_idx(z); | 424 | int zid = zone_idx(z); |
| 486 | struct mem_cgroup_per_zone *mz; | 425 | struct mem_cgroup_per_zone *mz; |
| 426 | int lru = LRU_FILE * !!file + !!active; | ||
| 487 | 427 | ||
| 488 | BUG_ON(!mem_cont); | 428 | BUG_ON(!mem_cont); |
| 489 | mz = mem_cgroup_zoneinfo(mem_cont, nid, zid); | 429 | mz = mem_cgroup_zoneinfo(mem_cont, nid, zid); |
| 490 | if (active) | 430 | src = &mz->lists[lru]; |
| 491 | src = &mz->active_list; | ||
| 492 | else | ||
| 493 | src = &mz->inactive_list; | ||
| 494 | |||
| 495 | 431 | ||
| 496 | spin_lock(&mz->lru_lock); | 432 | spin_lock(&mz->lru_lock); |
| 497 | scan = 0; | 433 | scan = 0; |
| 498 | list_for_each_entry_safe_reverse(pc, tmp, src, lru) { | 434 | list_for_each_entry_safe_reverse(pc, tmp, src, lru) { |
| 499 | if (scan >= nr_to_scan) | 435 | if (scan >= nr_to_scan) |
| 500 | break; | 436 | break; |
| 437 | if (unlikely(!PageCgroupUsed(pc))) | ||
| 438 | continue; | ||
| 501 | page = pc->page; | 439 | page = pc->page; |
| 502 | 440 | ||
| 503 | if (unlikely(!PageLRU(page))) | 441 | if (unlikely(!PageLRU(page))) |
| 504 | continue; | 442 | continue; |
| 505 | 443 | ||
| 506 | if (PageActive(page) && !active) { | 444 | /* |
| 507 | __mem_cgroup_move_lists(pc, true); | 445 | * TODO: play better with lumpy reclaim, grabbing anything. |
| 508 | continue; | 446 | */ |
| 509 | } | 447 | if (PageUnevictable(page) || |
| 510 | if (!PageActive(page) && active) { | 448 | (PageActive(page) && !active) || |
| 511 | __mem_cgroup_move_lists(pc, false); | 449 | (!PageActive(page) && active)) { |
| 450 | __mem_cgroup_move_lists(pc, page_lru(page)); | ||
| 512 | continue; | 451 | continue; |
| 513 | } | 452 | } |
| 514 | 453 | ||
| 515 | scan++; | 454 | scan++; |
| 516 | list_move(&pc->lru, &pc_list); | 455 | list_move(&pc->lru, &pc_list); |
| 517 | 456 | ||
| 518 | if (__isolate_lru_page(page, mode) == 0) { | 457 | if (__isolate_lru_page(page, mode, file) == 0) { |
| 519 | list_move(&page->lru, dst); | 458 | list_move(&page->lru, dst); |
| 520 | nr_taken++; | 459 | nr_taken++; |
| 521 | } | 460 | } |
| @@ -540,26 +479,27 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | |||
| 540 | { | 479 | { |
| 541 | struct mem_cgroup *mem; | 480 | struct mem_cgroup *mem; |
| 542 | struct page_cgroup *pc; | 481 | struct page_cgroup *pc; |
| 543 | unsigned long flags; | ||
| 544 | unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES; | 482 | unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES; |
| 545 | struct mem_cgroup_per_zone *mz; | 483 | struct mem_cgroup_per_zone *mz; |
| 484 | unsigned long flags; | ||
| 546 | 485 | ||
| 547 | pc = kmem_cache_alloc(page_cgroup_cache, gfp_mask); | 486 | pc = lookup_page_cgroup(page); |
| 548 | if (unlikely(pc == NULL)) | 487 | /* can happen at boot */ |
| 549 | goto err; | 488 | if (unlikely(!pc)) |
| 550 | 489 | return 0; | |
| 490 | prefetchw(pc); | ||
| 551 | /* | 491 | /* |
| 552 | * We always charge the cgroup the mm_struct belongs to. | 492 | * We always charge the cgroup the mm_struct belongs to. |
| 553 | * The mm_struct's mem_cgroup changes on task migration if the | 493 | * The mm_struct's mem_cgroup changes on task migration if the |
| 554 | * thread group leader migrates. It's possible that mm is not | 494 | * thread group leader migrates. It's possible that mm is not |
| 555 | * set, if so charge the init_mm (happens for pagecache usage). | 495 | * set, if so charge the init_mm (happens for pagecache usage). |
| 556 | */ | 496 | */ |
| 497 | |||
| 557 | if (likely(!memcg)) { | 498 | if (likely(!memcg)) { |
| 558 | rcu_read_lock(); | 499 | rcu_read_lock(); |
| 559 | mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); | 500 | mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); |
| 560 | if (unlikely(!mem)) { | 501 | if (unlikely(!mem)) { |
| 561 | rcu_read_unlock(); | 502 | rcu_read_unlock(); |
| 562 | kmem_cache_free(page_cgroup_cache, pc); | ||
| 563 | return 0; | 503 | return 0; |
| 564 | } | 504 | } |
| 565 | /* | 505 | /* |
| @@ -572,7 +512,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | |||
| 572 | css_get(&memcg->css); | 512 | css_get(&memcg->css); |
| 573 | } | 513 | } |
| 574 | 514 | ||
| 575 | while (res_counter_charge(&mem->res, PAGE_SIZE)) { | 515 | while (unlikely(res_counter_charge(&mem->res, PAGE_SIZE))) { |
| 576 | if (!(gfp_mask & __GFP_WAIT)) | 516 | if (!(gfp_mask & __GFP_WAIT)) |
| 577 | goto out; | 517 | goto out; |
| 578 | 518 | ||
| @@ -595,39 +535,33 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | |||
| 595 | } | 535 | } |
| 596 | } | 536 | } |
| 597 | 537 | ||
| 598 | pc->mem_cgroup = mem; | ||
| 599 | pc->page = page; | ||
| 600 | /* | ||
| 601 | * If a page is accounted as a page cache, insert to inactive list. | ||
| 602 | * If anon, insert to active list. | ||
| 603 | */ | ||
| 604 | if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) | ||
| 605 | pc->flags = PAGE_CGROUP_FLAG_CACHE; | ||
| 606 | else | ||
| 607 | pc->flags = PAGE_CGROUP_FLAG_ACTIVE; | ||
| 608 | 538 | ||
| 609 | lock_page_cgroup(page); | 539 | lock_page_cgroup(pc); |
| 610 | if (unlikely(page_get_page_cgroup(page))) { | 540 | if (unlikely(PageCgroupUsed(pc))) { |
| 611 | unlock_page_cgroup(page); | 541 | unlock_page_cgroup(pc); |
| 612 | res_counter_uncharge(&mem->res, PAGE_SIZE); | 542 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
| 613 | css_put(&mem->css); | 543 | css_put(&mem->css); |
| 614 | kmem_cache_free(page_cgroup_cache, pc); | 544 | |
| 615 | goto done; | 545 | goto done; |
| 616 | } | 546 | } |
| 617 | page_assign_page_cgroup(page, pc); | 547 | pc->mem_cgroup = mem; |
| 548 | /* | ||
| 549 | * If a page is accounted as a page cache, insert to inactive list. | ||
| 550 | * If anon, insert to active list. | ||
| 551 | */ | ||
| 552 | pc->flags = pcg_default_flags[ctype]; | ||
| 618 | 553 | ||
| 619 | mz = page_cgroup_zoneinfo(pc); | 554 | mz = page_cgroup_zoneinfo(pc); |
| 555 | |||
| 620 | spin_lock_irqsave(&mz->lru_lock, flags); | 556 | spin_lock_irqsave(&mz->lru_lock, flags); |
| 621 | __mem_cgroup_add_list(mz, pc); | 557 | __mem_cgroup_add_list(mz, pc); |
| 622 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 558 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
| 559 | unlock_page_cgroup(pc); | ||
| 623 | 560 | ||
| 624 | unlock_page_cgroup(page); | ||
| 625 | done: | 561 | done: |
| 626 | return 0; | 562 | return 0; |
| 627 | out: | 563 | out: |
| 628 | css_put(&mem->css); | 564 | css_put(&mem->css); |
| 629 | kmem_cache_free(page_cgroup_cache, pc); | ||
| 630 | err: | ||
| 631 | return -ENOMEM; | 565 | return -ENOMEM; |
| 632 | } | 566 | } |
| 633 | 567 | ||
| @@ -635,7 +569,8 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) | |||
| 635 | { | 569 | { |
| 636 | if (mem_cgroup_subsys.disabled) | 570 | if (mem_cgroup_subsys.disabled) |
| 637 | return 0; | 571 | return 0; |
| 638 | 572 | if (PageCompound(page)) | |
| 573 | return 0; | ||
| 639 | /* | 574 | /* |
| 640 | * If already mapped, we don't have to account. | 575 | * If already mapped, we don't have to account. |
| 641 | * If page cache, page->mapping has address_space. | 576 | * If page cache, page->mapping has address_space. |
| @@ -656,7 +591,8 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, | |||
| 656 | { | 591 | { |
| 657 | if (mem_cgroup_subsys.disabled) | 592 | if (mem_cgroup_subsys.disabled) |
| 658 | return 0; | 593 | return 0; |
| 659 | 594 | if (PageCompound(page)) | |
| 595 | return 0; | ||
| 660 | /* | 596 | /* |
| 661 | * Corner case handling. This is called from add_to_page_cache() | 597 | * Corner case handling. This is called from add_to_page_cache() |
| 662 | * in usual. But some FS (shmem) precharges this page before calling it | 598 | * in usual. But some FS (shmem) precharges this page before calling it |
| @@ -669,22 +605,27 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, | |||
| 669 | if (!(gfp_mask & __GFP_WAIT)) { | 605 | if (!(gfp_mask & __GFP_WAIT)) { |
| 670 | struct page_cgroup *pc; | 606 | struct page_cgroup *pc; |
| 671 | 607 | ||
| 672 | lock_page_cgroup(page); | 608 | |
| 673 | pc = page_get_page_cgroup(page); | 609 | pc = lookup_page_cgroup(page); |
| 674 | if (pc) { | 610 | if (!pc) |
| 675 | VM_BUG_ON(pc->page != page); | 611 | return 0; |
| 676 | VM_BUG_ON(!pc->mem_cgroup); | 612 | lock_page_cgroup(pc); |
| 677 | unlock_page_cgroup(page); | 613 | if (PageCgroupUsed(pc)) { |
| 614 | unlock_page_cgroup(pc); | ||
| 678 | return 0; | 615 | return 0; |
| 679 | } | 616 | } |
| 680 | unlock_page_cgroup(page); | 617 | unlock_page_cgroup(pc); |
| 681 | } | 618 | } |
| 682 | 619 | ||
| 683 | if (unlikely(!mm)) | 620 | if (unlikely(!mm)) |
| 684 | mm = &init_mm; | 621 | mm = &init_mm; |
| 685 | 622 | ||
| 686 | return mem_cgroup_charge_common(page, mm, gfp_mask, | 623 | if (page_is_file_cache(page)) |
| 624 | return mem_cgroup_charge_common(page, mm, gfp_mask, | ||
| 687 | MEM_CGROUP_CHARGE_TYPE_CACHE, NULL); | 625 | MEM_CGROUP_CHARGE_TYPE_CACHE, NULL); |
| 626 | else | ||
| 627 | return mem_cgroup_charge_common(page, mm, gfp_mask, | ||
| 628 | MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL); | ||
| 688 | } | 629 | } |
| 689 | 630 | ||
| 690 | /* | 631 | /* |
| @@ -704,44 +645,46 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
| 704 | /* | 645 | /* |
| 705 | * Check if our page_cgroup is valid | 646 | * Check if our page_cgroup is valid |
| 706 | */ | 647 | */ |
| 707 | lock_page_cgroup(page); | 648 | pc = lookup_page_cgroup(page); |
| 708 | pc = page_get_page_cgroup(page); | 649 | if (unlikely(!pc || !PageCgroupUsed(pc))) |
| 709 | if (unlikely(!pc)) | 650 | return; |
| 710 | goto unlock; | ||
| 711 | |||
| 712 | VM_BUG_ON(pc->page != page); | ||
| 713 | 651 | ||
| 714 | if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED) | 652 | lock_page_cgroup(pc); |
| 715 | && ((pc->flags & PAGE_CGROUP_FLAG_CACHE) | 653 | if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED && page_mapped(page)) |
| 716 | || page_mapped(page))) | 654 | || !PageCgroupUsed(pc)) { |
| 717 | goto unlock; | 655 | /* This happens at race in zap_pte_range() and do_swap_page()*/ |
| 656 | unlock_page_cgroup(pc); | ||
| 657 | return; | ||
| 658 | } | ||
| 659 | ClearPageCgroupUsed(pc); | ||
| 660 | mem = pc->mem_cgroup; | ||
| 718 | 661 | ||
| 719 | mz = page_cgroup_zoneinfo(pc); | 662 | mz = page_cgroup_zoneinfo(pc); |
| 720 | spin_lock_irqsave(&mz->lru_lock, flags); | 663 | spin_lock_irqsave(&mz->lru_lock, flags); |
| 721 | __mem_cgroup_remove_list(mz, pc); | 664 | __mem_cgroup_remove_list(mz, pc); |
| 722 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 665 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
| 666 | unlock_page_cgroup(pc); | ||
| 723 | 667 | ||
| 724 | page_assign_page_cgroup(page, NULL); | ||
| 725 | unlock_page_cgroup(page); | ||
| 726 | |||
| 727 | mem = pc->mem_cgroup; | ||
| 728 | res_counter_uncharge(&mem->res, PAGE_SIZE); | 668 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
| 729 | css_put(&mem->css); | 669 | css_put(&mem->css); |
| 730 | 670 | ||
| 731 | kmem_cache_free(page_cgroup_cache, pc); | ||
| 732 | return; | 671 | return; |
| 733 | unlock: | ||
| 734 | unlock_page_cgroup(page); | ||
| 735 | } | 672 | } |
| 736 | 673 | ||
| 737 | void mem_cgroup_uncharge_page(struct page *page) | 674 | void mem_cgroup_uncharge_page(struct page *page) |
| 738 | { | 675 | { |
| 676 | /* early check. */ | ||
| 677 | if (page_mapped(page)) | ||
| 678 | return; | ||
| 679 | if (page->mapping && !PageAnon(page)) | ||
| 680 | return; | ||
| 739 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED); | 681 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED); |
| 740 | } | 682 | } |
| 741 | 683 | ||
| 742 | void mem_cgroup_uncharge_cache_page(struct page *page) | 684 | void mem_cgroup_uncharge_cache_page(struct page *page) |
| 743 | { | 685 | { |
| 744 | VM_BUG_ON(page_mapped(page)); | 686 | VM_BUG_ON(page_mapped(page)); |
| 687 | VM_BUG_ON(page->mapping); | ||
| 745 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE); | 688 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE); |
| 746 | } | 689 | } |
| 747 | 690 | ||
| @@ -758,15 +701,19 @@ int mem_cgroup_prepare_migration(struct page *page, struct page *newpage) | |||
| 758 | if (mem_cgroup_subsys.disabled) | 701 | if (mem_cgroup_subsys.disabled) |
| 759 | return 0; | 702 | return 0; |
| 760 | 703 | ||
| 761 | lock_page_cgroup(page); | 704 | pc = lookup_page_cgroup(page); |
| 762 | pc = page_get_page_cgroup(page); | 705 | lock_page_cgroup(pc); |
| 763 | if (pc) { | 706 | if (PageCgroupUsed(pc)) { |
| 764 | mem = pc->mem_cgroup; | 707 | mem = pc->mem_cgroup; |
| 765 | css_get(&mem->css); | 708 | css_get(&mem->css); |
| 766 | if (pc->flags & PAGE_CGROUP_FLAG_CACHE) | 709 | if (PageCgroupCache(pc)) { |
| 767 | ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; | 710 | if (page_is_file_cache(page)) |
| 711 | ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; | ||
| 712 | else | ||
| 713 | ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM; | ||
| 714 | } | ||
| 768 | } | 715 | } |
| 769 | unlock_page_cgroup(page); | 716 | unlock_page_cgroup(pc); |
| 770 | if (mem) { | 717 | if (mem) { |
| 771 | ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL, | 718 | ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL, |
| 772 | ctype, mem); | 719 | ctype, mem); |
| @@ -791,7 +738,7 @@ void mem_cgroup_end_migration(struct page *newpage) | |||
| 791 | */ | 738 | */ |
| 792 | if (!newpage->mapping) | 739 | if (!newpage->mapping) |
| 793 | __mem_cgroup_uncharge_common(newpage, | 740 | __mem_cgroup_uncharge_common(newpage, |
| 794 | MEM_CGROUP_CHARGE_TYPE_FORCE); | 741 | MEM_CGROUP_CHARGE_TYPE_FORCE); |
| 795 | else if (PageAnon(newpage)) | 742 | else if (PageAnon(newpage)) |
| 796 | mem_cgroup_uncharge_page(newpage); | 743 | mem_cgroup_uncharge_page(newpage); |
| 797 | } | 744 | } |
| @@ -863,7 +810,7 @@ int mem_cgroup_resize_limit(struct mem_cgroup *memcg, unsigned long long val) | |||
| 863 | #define FORCE_UNCHARGE_BATCH (128) | 810 | #define FORCE_UNCHARGE_BATCH (128) |
| 864 | static void mem_cgroup_force_empty_list(struct mem_cgroup *mem, | 811 | static void mem_cgroup_force_empty_list(struct mem_cgroup *mem, |
| 865 | struct mem_cgroup_per_zone *mz, | 812 | struct mem_cgroup_per_zone *mz, |
| 866 | int active) | 813 | enum lru_list lru) |
| 867 | { | 814 | { |
| 868 | struct page_cgroup *pc; | 815 | struct page_cgroup *pc; |
| 869 | struct page *page; | 816 | struct page *page; |
| @@ -871,15 +818,14 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem, | |||
| 871 | unsigned long flags; | 818 | unsigned long flags; |
| 872 | struct list_head *list; | 819 | struct list_head *list; |
| 873 | 820 | ||
| 874 | if (active) | 821 | list = &mz->lists[lru]; |
| 875 | list = &mz->active_list; | ||
| 876 | else | ||
| 877 | list = &mz->inactive_list; | ||
| 878 | 822 | ||
| 879 | spin_lock_irqsave(&mz->lru_lock, flags); | 823 | spin_lock_irqsave(&mz->lru_lock, flags); |
| 880 | while (!list_empty(list)) { | 824 | while (!list_empty(list)) { |
| 881 | pc = list_entry(list->prev, struct page_cgroup, lru); | 825 | pc = list_entry(list->prev, struct page_cgroup, lru); |
| 882 | page = pc->page; | 826 | page = pc->page; |
| 827 | if (!PageCgroupUsed(pc)) | ||
| 828 | break; | ||
| 883 | get_page(page); | 829 | get_page(page); |
| 884 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 830 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
| 885 | /* | 831 | /* |
| @@ -894,8 +840,10 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem, | |||
| 894 | count = FORCE_UNCHARGE_BATCH; | 840 | count = FORCE_UNCHARGE_BATCH; |
| 895 | cond_resched(); | 841 | cond_resched(); |
| 896 | } | 842 | } |
| 897 | } else | 843 | } else { |
| 898 | cond_resched(); | 844 | spin_lock_irqsave(&mz->lru_lock, flags); |
| 845 | break; | ||
| 846 | } | ||
| 899 | spin_lock_irqsave(&mz->lru_lock, flags); | 847 | spin_lock_irqsave(&mz->lru_lock, flags); |
| 900 | } | 848 | } |
| 901 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 849 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
| @@ -919,15 +867,17 @@ static int mem_cgroup_force_empty(struct mem_cgroup *mem) | |||
| 919 | while (mem->res.usage > 0) { | 867 | while (mem->res.usage > 0) { |
| 920 | if (atomic_read(&mem->css.cgroup->count) > 0) | 868 | if (atomic_read(&mem->css.cgroup->count) > 0) |
| 921 | goto out; | 869 | goto out; |
| 870 | /* This is for making all *used* pages to be on LRU. */ | ||
| 871 | lru_add_drain_all(); | ||
| 922 | for_each_node_state(node, N_POSSIBLE) | 872 | for_each_node_state(node, N_POSSIBLE) |
| 923 | for (zid = 0; zid < MAX_NR_ZONES; zid++) { | 873 | for (zid = 0; zid < MAX_NR_ZONES; zid++) { |
| 924 | struct mem_cgroup_per_zone *mz; | 874 | struct mem_cgroup_per_zone *mz; |
| 875 | enum lru_list l; | ||
| 925 | mz = mem_cgroup_zoneinfo(mem, node, zid); | 876 | mz = mem_cgroup_zoneinfo(mem, node, zid); |
| 926 | /* drop all page_cgroup in active_list */ | 877 | for_each_lru(l) |
| 927 | mem_cgroup_force_empty_list(mem, mz, 1); | 878 | mem_cgroup_force_empty_list(mem, mz, l); |
| 928 | /* drop all page_cgroup in inactive_list */ | ||
| 929 | mem_cgroup_force_empty_list(mem, mz, 0); | ||
| 930 | } | 879 | } |
| 880 | cond_resched(); | ||
| 931 | } | 881 | } |
| 932 | ret = 0; | 882 | ret = 0; |
| 933 | out: | 883 | out: |
| @@ -1012,14 +962,27 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, | |||
| 1012 | } | 962 | } |
| 1013 | /* showing # of active pages */ | 963 | /* showing # of active pages */ |
| 1014 | { | 964 | { |
| 1015 | unsigned long active, inactive; | 965 | unsigned long active_anon, inactive_anon; |
| 1016 | 966 | unsigned long active_file, inactive_file; | |
| 1017 | inactive = mem_cgroup_get_all_zonestat(mem_cont, | 967 | unsigned long unevictable; |
| 1018 | MEM_CGROUP_ZSTAT_INACTIVE); | 968 | |
| 1019 | active = mem_cgroup_get_all_zonestat(mem_cont, | 969 | inactive_anon = mem_cgroup_get_all_zonestat(mem_cont, |
| 1020 | MEM_CGROUP_ZSTAT_ACTIVE); | 970 | LRU_INACTIVE_ANON); |
| 1021 | cb->fill(cb, "active", (active) * PAGE_SIZE); | 971 | active_anon = mem_cgroup_get_all_zonestat(mem_cont, |
| 1022 | cb->fill(cb, "inactive", (inactive) * PAGE_SIZE); | 972 | LRU_ACTIVE_ANON); |
| 973 | inactive_file = mem_cgroup_get_all_zonestat(mem_cont, | ||
| 974 | LRU_INACTIVE_FILE); | ||
| 975 | active_file = mem_cgroup_get_all_zonestat(mem_cont, | ||
| 976 | LRU_ACTIVE_FILE); | ||
| 977 | unevictable = mem_cgroup_get_all_zonestat(mem_cont, | ||
| 978 | LRU_UNEVICTABLE); | ||
| 979 | |||
| 980 | cb->fill(cb, "active_anon", (active_anon) * PAGE_SIZE); | ||
| 981 | cb->fill(cb, "inactive_anon", (inactive_anon) * PAGE_SIZE); | ||
| 982 | cb->fill(cb, "active_file", (active_file) * PAGE_SIZE); | ||
| 983 | cb->fill(cb, "inactive_file", (inactive_file) * PAGE_SIZE); | ||
| 984 | cb->fill(cb, "unevictable", unevictable * PAGE_SIZE); | ||
| 985 | |||
| 1023 | } | 986 | } |
| 1024 | return 0; | 987 | return 0; |
| 1025 | } | 988 | } |
| @@ -1062,6 +1025,7 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) | |||
| 1062 | { | 1025 | { |
| 1063 | struct mem_cgroup_per_node *pn; | 1026 | struct mem_cgroup_per_node *pn; |
| 1064 | struct mem_cgroup_per_zone *mz; | 1027 | struct mem_cgroup_per_zone *mz; |
| 1028 | enum lru_list l; | ||
| 1065 | int zone, tmp = node; | 1029 | int zone, tmp = node; |
| 1066 | /* | 1030 | /* |
| 1067 | * This routine is called against possible nodes. | 1031 | * This routine is called against possible nodes. |
| @@ -1082,9 +1046,9 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) | |||
| 1082 | 1046 | ||
| 1083 | for (zone = 0; zone < MAX_NR_ZONES; zone++) { | 1047 | for (zone = 0; zone < MAX_NR_ZONES; zone++) { |
| 1084 | mz = &pn->zoneinfo[zone]; | 1048 | mz = &pn->zoneinfo[zone]; |
| 1085 | INIT_LIST_HEAD(&mz->active_list); | ||
| 1086 | INIT_LIST_HEAD(&mz->inactive_list); | ||
| 1087 | spin_lock_init(&mz->lru_lock); | 1049 | spin_lock_init(&mz->lru_lock); |
| 1050 | for_each_lru(l) | ||
| 1051 | INIT_LIST_HEAD(&mz->lists[l]); | ||
| 1088 | } | 1052 | } |
| 1089 | return 0; | 1053 | return 0; |
| 1090 | } | 1054 | } |
| @@ -1125,7 +1089,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | |||
| 1125 | 1089 | ||
| 1126 | if (unlikely((cont->parent) == NULL)) { | 1090 | if (unlikely((cont->parent) == NULL)) { |
| 1127 | mem = &init_mem_cgroup; | 1091 | mem = &init_mem_cgroup; |
| 1128 | page_cgroup_cache = KMEM_CACHE(page_cgroup, SLAB_PANIC); | ||
| 1129 | } else { | 1092 | } else { |
| 1130 | mem = mem_cgroup_alloc(); | 1093 | mem = mem_cgroup_alloc(); |
| 1131 | if (!mem) | 1094 | if (!mem) |
