diff options
Diffstat (limited to 'mm/swapfile.c')
-rw-r--r-- | mm/swapfile.c | 176 |
1 files changed, 126 insertions, 50 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c index e97a0e5aea91..a1f7772a01fc 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -47,9 +47,11 @@ static sector_t map_swap_entry(swp_entry_t, struct block_device**); | |||
47 | 47 | ||
48 | DEFINE_SPINLOCK(swap_lock); | 48 | DEFINE_SPINLOCK(swap_lock); |
49 | static unsigned int nr_swapfiles; | 49 | static unsigned int nr_swapfiles; |
50 | long nr_swap_pages; | 50 | atomic_long_t nr_swap_pages; |
51 | /* protected with swap_lock. reading in vm_swap_full() doesn't need lock */ | ||
51 | long total_swap_pages; | 52 | long total_swap_pages; |
52 | static int least_priority; | 53 | static int least_priority; |
54 | static atomic_t highest_priority_index = ATOMIC_INIT(-1); | ||
53 | 55 | ||
54 | static const char Bad_file[] = "Bad swap file entry "; | 56 | static const char Bad_file[] = "Bad swap file entry "; |
55 | static const char Unused_file[] = "Unused swap file entry "; | 57 | static const char Unused_file[] = "Unused swap file entry "; |
@@ -79,7 +81,7 @@ __try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset) | |||
79 | struct page *page; | 81 | struct page *page; |
80 | int ret = 0; | 82 | int ret = 0; |
81 | 83 | ||
82 | page = find_get_page(&swapper_space, entry.val); | 84 | page = find_get_page(swap_address_space(entry), entry.val); |
83 | if (!page) | 85 | if (!page) |
84 | return 0; | 86 | return 0; |
85 | /* | 87 | /* |
@@ -223,7 +225,7 @@ static unsigned long scan_swap_map(struct swap_info_struct *si, | |||
223 | si->lowest_alloc = si->max; | 225 | si->lowest_alloc = si->max; |
224 | si->highest_alloc = 0; | 226 | si->highest_alloc = 0; |
225 | } | 227 | } |
226 | spin_unlock(&swap_lock); | 228 | spin_unlock(&si->lock); |
227 | 229 | ||
228 | /* | 230 | /* |
229 | * If seek is expensive, start searching for new cluster from | 231 | * If seek is expensive, start searching for new cluster from |
@@ -242,7 +244,7 @@ static unsigned long scan_swap_map(struct swap_info_struct *si, | |||
242 | if (si->swap_map[offset]) | 244 | if (si->swap_map[offset]) |
243 | last_in_cluster = offset + SWAPFILE_CLUSTER; | 245 | last_in_cluster = offset + SWAPFILE_CLUSTER; |
244 | else if (offset == last_in_cluster) { | 246 | else if (offset == last_in_cluster) { |
245 | spin_lock(&swap_lock); | 247 | spin_lock(&si->lock); |
246 | offset -= SWAPFILE_CLUSTER - 1; | 248 | offset -= SWAPFILE_CLUSTER - 1; |
247 | si->cluster_next = offset; | 249 | si->cluster_next = offset; |
248 | si->cluster_nr = SWAPFILE_CLUSTER - 1; | 250 | si->cluster_nr = SWAPFILE_CLUSTER - 1; |
@@ -263,7 +265,7 @@ static unsigned long scan_swap_map(struct swap_info_struct *si, | |||
263 | if (si->swap_map[offset]) | 265 | if (si->swap_map[offset]) |
264 | last_in_cluster = offset + SWAPFILE_CLUSTER; | 266 | last_in_cluster = offset + SWAPFILE_CLUSTER; |
265 | else if (offset == last_in_cluster) { | 267 | else if (offset == last_in_cluster) { |
266 | spin_lock(&swap_lock); | 268 | spin_lock(&si->lock); |
267 | offset -= SWAPFILE_CLUSTER - 1; | 269 | offset -= SWAPFILE_CLUSTER - 1; |
268 | si->cluster_next = offset; | 270 | si->cluster_next = offset; |
269 | si->cluster_nr = SWAPFILE_CLUSTER - 1; | 271 | si->cluster_nr = SWAPFILE_CLUSTER - 1; |
@@ -277,7 +279,7 @@ static unsigned long scan_swap_map(struct swap_info_struct *si, | |||
277 | } | 279 | } |
278 | 280 | ||
279 | offset = scan_base; | 281 | offset = scan_base; |
280 | spin_lock(&swap_lock); | 282 | spin_lock(&si->lock); |
281 | si->cluster_nr = SWAPFILE_CLUSTER - 1; | 283 | si->cluster_nr = SWAPFILE_CLUSTER - 1; |
282 | si->lowest_alloc = 0; | 284 | si->lowest_alloc = 0; |
283 | } | 285 | } |
@@ -293,9 +295,9 @@ checks: | |||
293 | /* reuse swap entry of cache-only swap if not busy. */ | 295 | /* reuse swap entry of cache-only swap if not busy. */ |
294 | if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) { | 296 | if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) { |
295 | int swap_was_freed; | 297 | int swap_was_freed; |
296 | spin_unlock(&swap_lock); | 298 | spin_unlock(&si->lock); |
297 | swap_was_freed = __try_to_reclaim_swap(si, offset); | 299 | swap_was_freed = __try_to_reclaim_swap(si, offset); |
298 | spin_lock(&swap_lock); | 300 | spin_lock(&si->lock); |
299 | /* entry was freed successfully, try to use this again */ | 301 | /* entry was freed successfully, try to use this again */ |
300 | if (swap_was_freed) | 302 | if (swap_was_freed) |
301 | goto checks; | 303 | goto checks; |
@@ -335,13 +337,13 @@ checks: | |||
335 | si->lowest_alloc <= last_in_cluster) | 337 | si->lowest_alloc <= last_in_cluster) |
336 | last_in_cluster = si->lowest_alloc - 1; | 338 | last_in_cluster = si->lowest_alloc - 1; |
337 | si->flags |= SWP_DISCARDING; | 339 | si->flags |= SWP_DISCARDING; |
338 | spin_unlock(&swap_lock); | 340 | spin_unlock(&si->lock); |
339 | 341 | ||
340 | if (offset < last_in_cluster) | 342 | if (offset < last_in_cluster) |
341 | discard_swap_cluster(si, offset, | 343 | discard_swap_cluster(si, offset, |
342 | last_in_cluster - offset + 1); | 344 | last_in_cluster - offset + 1); |
343 | 345 | ||
344 | spin_lock(&swap_lock); | 346 | spin_lock(&si->lock); |
345 | si->lowest_alloc = 0; | 347 | si->lowest_alloc = 0; |
346 | si->flags &= ~SWP_DISCARDING; | 348 | si->flags &= ~SWP_DISCARDING; |
347 | 349 | ||
@@ -355,10 +357,10 @@ checks: | |||
355 | * could defer that delay until swap_writepage, | 357 | * could defer that delay until swap_writepage, |
356 | * but it's easier to keep this self-contained. | 358 | * but it's easier to keep this self-contained. |
357 | */ | 359 | */ |
358 | spin_unlock(&swap_lock); | 360 | spin_unlock(&si->lock); |
359 | wait_on_bit(&si->flags, ilog2(SWP_DISCARDING), | 361 | wait_on_bit(&si->flags, ilog2(SWP_DISCARDING), |
360 | wait_for_discard, TASK_UNINTERRUPTIBLE); | 362 | wait_for_discard, TASK_UNINTERRUPTIBLE); |
361 | spin_lock(&swap_lock); | 363 | spin_lock(&si->lock); |
362 | } else { | 364 | } else { |
363 | /* | 365 | /* |
364 | * Note pages allocated by racing tasks while | 366 | * Note pages allocated by racing tasks while |
@@ -374,14 +376,14 @@ checks: | |||
374 | return offset; | 376 | return offset; |
375 | 377 | ||
376 | scan: | 378 | scan: |
377 | spin_unlock(&swap_lock); | 379 | spin_unlock(&si->lock); |
378 | while (++offset <= si->highest_bit) { | 380 | while (++offset <= si->highest_bit) { |
379 | if (!si->swap_map[offset]) { | 381 | if (!si->swap_map[offset]) { |
380 | spin_lock(&swap_lock); | 382 | spin_lock(&si->lock); |
381 | goto checks; | 383 | goto checks; |
382 | } | 384 | } |
383 | if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) { | 385 | if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) { |
384 | spin_lock(&swap_lock); | 386 | spin_lock(&si->lock); |
385 | goto checks; | 387 | goto checks; |
386 | } | 388 | } |
387 | if (unlikely(--latency_ration < 0)) { | 389 | if (unlikely(--latency_ration < 0)) { |
@@ -392,11 +394,11 @@ scan: | |||
392 | offset = si->lowest_bit; | 394 | offset = si->lowest_bit; |
393 | while (++offset < scan_base) { | 395 | while (++offset < scan_base) { |
394 | if (!si->swap_map[offset]) { | 396 | if (!si->swap_map[offset]) { |
395 | spin_lock(&swap_lock); | 397 | spin_lock(&si->lock); |
396 | goto checks; | 398 | goto checks; |
397 | } | 399 | } |
398 | if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) { | 400 | if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) { |
399 | spin_lock(&swap_lock); | 401 | spin_lock(&si->lock); |
400 | goto checks; | 402 | goto checks; |
401 | } | 403 | } |
402 | if (unlikely(--latency_ration < 0)) { | 404 | if (unlikely(--latency_ration < 0)) { |
@@ -404,7 +406,7 @@ scan: | |||
404 | latency_ration = LATENCY_LIMIT; | 406 | latency_ration = LATENCY_LIMIT; |
405 | } | 407 | } |
406 | } | 408 | } |
407 | spin_lock(&swap_lock); | 409 | spin_lock(&si->lock); |
408 | 410 | ||
409 | no_page: | 411 | no_page: |
410 | si->flags -= SWP_SCANNING; | 412 | si->flags -= SWP_SCANNING; |
@@ -417,13 +419,34 @@ swp_entry_t get_swap_page(void) | |||
417 | pgoff_t offset; | 419 | pgoff_t offset; |
418 | int type, next; | 420 | int type, next; |
419 | int wrapped = 0; | 421 | int wrapped = 0; |
422 | int hp_index; | ||
420 | 423 | ||
421 | spin_lock(&swap_lock); | 424 | spin_lock(&swap_lock); |
422 | if (nr_swap_pages <= 0) | 425 | if (atomic_long_read(&nr_swap_pages) <= 0) |
423 | goto noswap; | 426 | goto noswap; |
424 | nr_swap_pages--; | 427 | atomic_long_dec(&nr_swap_pages); |
425 | 428 | ||
426 | for (type = swap_list.next; type >= 0 && wrapped < 2; type = next) { | 429 | for (type = swap_list.next; type >= 0 && wrapped < 2; type = next) { |
430 | hp_index = atomic_xchg(&highest_priority_index, -1); | ||
431 | /* | ||
432 | * highest_priority_index records current highest priority swap | ||
433 | * type which just frees swap entries. If its priority is | ||
434 | * higher than that of swap_list.next swap type, we use it. It | ||
435 | * isn't protected by swap_lock, so it can be an invalid value | ||
436 | * if the corresponding swap type is swapoff. We double check | ||
437 | * the flags here. It's even possible the swap type is swapoff | ||
438 | * and swapon again and its priority is changed. In such rare | ||
439 | * case, low prority swap type might be used, but eventually | ||
440 | * high priority swap will be used after several rounds of | ||
441 | * swap. | ||
442 | */ | ||
443 | if (hp_index != -1 && hp_index != type && | ||
444 | swap_info[type]->prio < swap_info[hp_index]->prio && | ||
445 | (swap_info[hp_index]->flags & SWP_WRITEOK)) { | ||
446 | type = hp_index; | ||
447 | swap_list.next = type; | ||
448 | } | ||
449 | |||
427 | si = swap_info[type]; | 450 | si = swap_info[type]; |
428 | next = si->next; | 451 | next = si->next; |
429 | if (next < 0 || | 452 | if (next < 0 || |
@@ -432,22 +455,29 @@ swp_entry_t get_swap_page(void) | |||
432 | wrapped++; | 455 | wrapped++; |
433 | } | 456 | } |
434 | 457 | ||
435 | if (!si->highest_bit) | 458 | spin_lock(&si->lock); |
459 | if (!si->highest_bit) { | ||
460 | spin_unlock(&si->lock); | ||
436 | continue; | 461 | continue; |
437 | if (!(si->flags & SWP_WRITEOK)) | 462 | } |
463 | if (!(si->flags & SWP_WRITEOK)) { | ||
464 | spin_unlock(&si->lock); | ||
438 | continue; | 465 | continue; |
466 | } | ||
439 | 467 | ||
440 | swap_list.next = next; | 468 | swap_list.next = next; |
469 | |||
470 | spin_unlock(&swap_lock); | ||
441 | /* This is called for allocating swap entry for cache */ | 471 | /* This is called for allocating swap entry for cache */ |
442 | offset = scan_swap_map(si, SWAP_HAS_CACHE); | 472 | offset = scan_swap_map(si, SWAP_HAS_CACHE); |
443 | if (offset) { | 473 | spin_unlock(&si->lock); |
444 | spin_unlock(&swap_lock); | 474 | if (offset) |
445 | return swp_entry(type, offset); | 475 | return swp_entry(type, offset); |
446 | } | 476 | spin_lock(&swap_lock); |
447 | next = swap_list.next; | 477 | next = swap_list.next; |
448 | } | 478 | } |
449 | 479 | ||
450 | nr_swap_pages++; | 480 | atomic_long_inc(&nr_swap_pages); |
451 | noswap: | 481 | noswap: |
452 | spin_unlock(&swap_lock); | 482 | spin_unlock(&swap_lock); |
453 | return (swp_entry_t) {0}; | 483 | return (swp_entry_t) {0}; |
@@ -459,19 +489,19 @@ swp_entry_t get_swap_page_of_type(int type) | |||
459 | struct swap_info_struct *si; | 489 | struct swap_info_struct *si; |
460 | pgoff_t offset; | 490 | pgoff_t offset; |
461 | 491 | ||
462 | spin_lock(&swap_lock); | ||
463 | si = swap_info[type]; | 492 | si = swap_info[type]; |
493 | spin_lock(&si->lock); | ||
464 | if (si && (si->flags & SWP_WRITEOK)) { | 494 | if (si && (si->flags & SWP_WRITEOK)) { |
465 | nr_swap_pages--; | 495 | atomic_long_dec(&nr_swap_pages); |
466 | /* This is called for allocating swap entry, not cache */ | 496 | /* This is called for allocating swap entry, not cache */ |
467 | offset = scan_swap_map(si, 1); | 497 | offset = scan_swap_map(si, 1); |
468 | if (offset) { | 498 | if (offset) { |
469 | spin_unlock(&swap_lock); | 499 | spin_unlock(&si->lock); |
470 | return swp_entry(type, offset); | 500 | return swp_entry(type, offset); |
471 | } | 501 | } |
472 | nr_swap_pages++; | 502 | atomic_long_inc(&nr_swap_pages); |
473 | } | 503 | } |
474 | spin_unlock(&swap_lock); | 504 | spin_unlock(&si->lock); |
475 | return (swp_entry_t) {0}; | 505 | return (swp_entry_t) {0}; |
476 | } | 506 | } |
477 | 507 | ||
@@ -493,7 +523,7 @@ static struct swap_info_struct *swap_info_get(swp_entry_t entry) | |||
493 | goto bad_offset; | 523 | goto bad_offset; |
494 | if (!p->swap_map[offset]) | 524 | if (!p->swap_map[offset]) |
495 | goto bad_free; | 525 | goto bad_free; |
496 | spin_lock(&swap_lock); | 526 | spin_lock(&p->lock); |
497 | return p; | 527 | return p; |
498 | 528 | ||
499 | bad_free: | 529 | bad_free: |
@@ -511,6 +541,27 @@ out: | |||
511 | return NULL; | 541 | return NULL; |
512 | } | 542 | } |
513 | 543 | ||
544 | /* | ||
545 | * This swap type frees swap entry, check if it is the highest priority swap | ||
546 | * type which just frees swap entry. get_swap_page() uses | ||
547 | * highest_priority_index to search highest priority swap type. The | ||
548 | * swap_info_struct.lock can't protect us if there are multiple swap types | ||
549 | * active, so we use atomic_cmpxchg. | ||
550 | */ | ||
551 | static void set_highest_priority_index(int type) | ||
552 | { | ||
553 | int old_hp_index, new_hp_index; | ||
554 | |||
555 | do { | ||
556 | old_hp_index = atomic_read(&highest_priority_index); | ||
557 | if (old_hp_index != -1 && | ||
558 | swap_info[old_hp_index]->prio >= swap_info[type]->prio) | ||
559 | break; | ||
560 | new_hp_index = type; | ||
561 | } while (atomic_cmpxchg(&highest_priority_index, | ||
562 | old_hp_index, new_hp_index) != old_hp_index); | ||
563 | } | ||
564 | |||
514 | static unsigned char swap_entry_free(struct swap_info_struct *p, | 565 | static unsigned char swap_entry_free(struct swap_info_struct *p, |
515 | swp_entry_t entry, unsigned char usage) | 566 | swp_entry_t entry, unsigned char usage) |
516 | { | 567 | { |
@@ -553,10 +604,8 @@ static unsigned char swap_entry_free(struct swap_info_struct *p, | |||
553 | p->lowest_bit = offset; | 604 | p->lowest_bit = offset; |
554 | if (offset > p->highest_bit) | 605 | if (offset > p->highest_bit) |
555 | p->highest_bit = offset; | 606 | p->highest_bit = offset; |
556 | if (swap_list.next >= 0 && | 607 | set_highest_priority_index(p->type); |
557 | p->prio > swap_info[swap_list.next]->prio) | 608 | atomic_long_inc(&nr_swap_pages); |
558 | swap_list.next = p->type; | ||
559 | nr_swap_pages++; | ||
560 | p->inuse_pages--; | 609 | p->inuse_pages--; |
561 | frontswap_invalidate_page(p->type, offset); | 610 | frontswap_invalidate_page(p->type, offset); |
562 | if (p->flags & SWP_BLKDEV) { | 611 | if (p->flags & SWP_BLKDEV) { |
@@ -581,7 +630,7 @@ void swap_free(swp_entry_t entry) | |||
581 | p = swap_info_get(entry); | 630 | p = swap_info_get(entry); |
582 | if (p) { | 631 | if (p) { |
583 | swap_entry_free(p, entry, 1); | 632 | swap_entry_free(p, entry, 1); |
584 | spin_unlock(&swap_lock); | 633 | spin_unlock(&p->lock); |
585 | } | 634 | } |
586 | } | 635 | } |
587 | 636 | ||
@@ -598,7 +647,7 @@ void swapcache_free(swp_entry_t entry, struct page *page) | |||
598 | count = swap_entry_free(p, entry, SWAP_HAS_CACHE); | 647 | count = swap_entry_free(p, entry, SWAP_HAS_CACHE); |
599 | if (page) | 648 | if (page) |
600 | mem_cgroup_uncharge_swapcache(page, entry, count != 0); | 649 | mem_cgroup_uncharge_swapcache(page, entry, count != 0); |
601 | spin_unlock(&swap_lock); | 650 | spin_unlock(&p->lock); |
602 | } | 651 | } |
603 | } | 652 | } |
604 | 653 | ||
@@ -617,7 +666,7 @@ int page_swapcount(struct page *page) | |||
617 | p = swap_info_get(entry); | 666 | p = swap_info_get(entry); |
618 | if (p) { | 667 | if (p) { |
619 | count = swap_count(p->swap_map[swp_offset(entry)]); | 668 | count = swap_count(p->swap_map[swp_offset(entry)]); |
620 | spin_unlock(&swap_lock); | 669 | spin_unlock(&p->lock); |
621 | } | 670 | } |
622 | return count; | 671 | return count; |
623 | } | 672 | } |
@@ -699,13 +748,14 @@ int free_swap_and_cache(swp_entry_t entry) | |||
699 | p = swap_info_get(entry); | 748 | p = swap_info_get(entry); |
700 | if (p) { | 749 | if (p) { |
701 | if (swap_entry_free(p, entry, 1) == SWAP_HAS_CACHE) { | 750 | if (swap_entry_free(p, entry, 1) == SWAP_HAS_CACHE) { |
702 | page = find_get_page(&swapper_space, entry.val); | 751 | page = find_get_page(swap_address_space(entry), |
752 | entry.val); | ||
703 | if (page && !trylock_page(page)) { | 753 | if (page && !trylock_page(page)) { |
704 | page_cache_release(page); | 754 | page_cache_release(page); |
705 | page = NULL; | 755 | page = NULL; |
706 | } | 756 | } |
707 | } | 757 | } |
708 | spin_unlock(&swap_lock); | 758 | spin_unlock(&p->lock); |
709 | } | 759 | } |
710 | if (page) { | 760 | if (page) { |
711 | /* | 761 | /* |
@@ -803,11 +853,13 @@ unsigned int count_swap_pages(int type, int free) | |||
803 | if ((unsigned int)type < nr_swapfiles) { | 853 | if ((unsigned int)type < nr_swapfiles) { |
804 | struct swap_info_struct *sis = swap_info[type]; | 854 | struct swap_info_struct *sis = swap_info[type]; |
805 | 855 | ||
856 | spin_lock(&sis->lock); | ||
806 | if (sis->flags & SWP_WRITEOK) { | 857 | if (sis->flags & SWP_WRITEOK) { |
807 | n = sis->pages; | 858 | n = sis->pages; |
808 | if (free) | 859 | if (free) |
809 | n -= sis->inuse_pages; | 860 | n -= sis->inuse_pages; |
810 | } | 861 | } |
862 | spin_unlock(&sis->lock); | ||
811 | } | 863 | } |
812 | spin_unlock(&swap_lock); | 864 | spin_unlock(&swap_lock); |
813 | return n; | 865 | return n; |
@@ -822,11 +874,17 @@ unsigned int count_swap_pages(int type, int free) | |||
822 | static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, | 874 | static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, |
823 | unsigned long addr, swp_entry_t entry, struct page *page) | 875 | unsigned long addr, swp_entry_t entry, struct page *page) |
824 | { | 876 | { |
877 | struct page *swapcache; | ||
825 | struct mem_cgroup *memcg; | 878 | struct mem_cgroup *memcg; |
826 | spinlock_t *ptl; | 879 | spinlock_t *ptl; |
827 | pte_t *pte; | 880 | pte_t *pte; |
828 | int ret = 1; | 881 | int ret = 1; |
829 | 882 | ||
883 | swapcache = page; | ||
884 | page = ksm_might_need_to_copy(page, vma, addr); | ||
885 | if (unlikely(!page)) | ||
886 | return -ENOMEM; | ||
887 | |||
830 | if (mem_cgroup_try_charge_swapin(vma->vm_mm, page, | 888 | if (mem_cgroup_try_charge_swapin(vma->vm_mm, page, |
831 | GFP_KERNEL, &memcg)) { | 889 | GFP_KERNEL, &memcg)) { |
832 | ret = -ENOMEM; | 890 | ret = -ENOMEM; |
@@ -845,7 +903,10 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, | |||
845 | get_page(page); | 903 | get_page(page); |
846 | set_pte_at(vma->vm_mm, addr, pte, | 904 | set_pte_at(vma->vm_mm, addr, pte, |
847 | pte_mkold(mk_pte(page, vma->vm_page_prot))); | 905 | pte_mkold(mk_pte(page, vma->vm_page_prot))); |
848 | page_add_anon_rmap(page, vma, addr); | 906 | if (page == swapcache) |
907 | page_add_anon_rmap(page, vma, addr); | ||
908 | else /* ksm created a completely new copy */ | ||
909 | page_add_new_anon_rmap(page, vma, addr); | ||
849 | mem_cgroup_commit_charge_swapin(page, memcg); | 910 | mem_cgroup_commit_charge_swapin(page, memcg); |
850 | swap_free(entry); | 911 | swap_free(entry); |
851 | /* | 912 | /* |
@@ -856,6 +917,10 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, | |||
856 | out: | 917 | out: |
857 | pte_unmap_unlock(pte, ptl); | 918 | pte_unmap_unlock(pte, ptl); |
858 | out_nolock: | 919 | out_nolock: |
920 | if (page != swapcache) { | ||
921 | unlock_page(page); | ||
922 | put_page(page); | ||
923 | } | ||
859 | return ret; | 924 | return ret; |
860 | } | 925 | } |
861 | 926 | ||
@@ -1456,7 +1521,7 @@ static void _enable_swap_info(struct swap_info_struct *p, int prio, | |||
1456 | p->swap_map = swap_map; | 1521 | p->swap_map = swap_map; |
1457 | frontswap_map_set(p, frontswap_map); | 1522 | frontswap_map_set(p, frontswap_map); |
1458 | p->flags |= SWP_WRITEOK; | 1523 | p->flags |= SWP_WRITEOK; |
1459 | nr_swap_pages += p->pages; | 1524 | atomic_long_add(p->pages, &nr_swap_pages); |
1460 | total_swap_pages += p->pages; | 1525 | total_swap_pages += p->pages; |
1461 | 1526 | ||
1462 | /* insert swap space into swap_list: */ | 1527 | /* insert swap space into swap_list: */ |
@@ -1478,15 +1543,19 @@ static void enable_swap_info(struct swap_info_struct *p, int prio, | |||
1478 | unsigned long *frontswap_map) | 1543 | unsigned long *frontswap_map) |
1479 | { | 1544 | { |
1480 | spin_lock(&swap_lock); | 1545 | spin_lock(&swap_lock); |
1546 | spin_lock(&p->lock); | ||
1481 | _enable_swap_info(p, prio, swap_map, frontswap_map); | 1547 | _enable_swap_info(p, prio, swap_map, frontswap_map); |
1482 | frontswap_init(p->type); | 1548 | frontswap_init(p->type); |
1549 | spin_unlock(&p->lock); | ||
1483 | spin_unlock(&swap_lock); | 1550 | spin_unlock(&swap_lock); |
1484 | } | 1551 | } |
1485 | 1552 | ||
1486 | static void reinsert_swap_info(struct swap_info_struct *p) | 1553 | static void reinsert_swap_info(struct swap_info_struct *p) |
1487 | { | 1554 | { |
1488 | spin_lock(&swap_lock); | 1555 | spin_lock(&swap_lock); |
1556 | spin_lock(&p->lock); | ||
1489 | _enable_swap_info(p, p->prio, p->swap_map, frontswap_map_get(p)); | 1557 | _enable_swap_info(p, p->prio, p->swap_map, frontswap_map_get(p)); |
1558 | spin_unlock(&p->lock); | ||
1490 | spin_unlock(&swap_lock); | 1559 | spin_unlock(&swap_lock); |
1491 | } | 1560 | } |
1492 | 1561 | ||
@@ -1546,14 +1615,16 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) | |||
1546 | /* just pick something that's safe... */ | 1615 | /* just pick something that's safe... */ |
1547 | swap_list.next = swap_list.head; | 1616 | swap_list.next = swap_list.head; |
1548 | } | 1617 | } |
1618 | spin_lock(&p->lock); | ||
1549 | if (p->prio < 0) { | 1619 | if (p->prio < 0) { |
1550 | for (i = p->next; i >= 0; i = swap_info[i]->next) | 1620 | for (i = p->next; i >= 0; i = swap_info[i]->next) |
1551 | swap_info[i]->prio = p->prio--; | 1621 | swap_info[i]->prio = p->prio--; |
1552 | least_priority++; | 1622 | least_priority++; |
1553 | } | 1623 | } |
1554 | nr_swap_pages -= p->pages; | 1624 | atomic_long_sub(p->pages, &nr_swap_pages); |
1555 | total_swap_pages -= p->pages; | 1625 | total_swap_pages -= p->pages; |
1556 | p->flags &= ~SWP_WRITEOK; | 1626 | p->flags &= ~SWP_WRITEOK; |
1627 | spin_unlock(&p->lock); | ||
1557 | spin_unlock(&swap_lock); | 1628 | spin_unlock(&swap_lock); |
1558 | 1629 | ||
1559 | set_current_oom_origin(); | 1630 | set_current_oom_origin(); |
@@ -1572,14 +1643,17 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) | |||
1572 | 1643 | ||
1573 | mutex_lock(&swapon_mutex); | 1644 | mutex_lock(&swapon_mutex); |
1574 | spin_lock(&swap_lock); | 1645 | spin_lock(&swap_lock); |
1646 | spin_lock(&p->lock); | ||
1575 | drain_mmlist(); | 1647 | drain_mmlist(); |
1576 | 1648 | ||
1577 | /* wait for anyone still in scan_swap_map */ | 1649 | /* wait for anyone still in scan_swap_map */ |
1578 | p->highest_bit = 0; /* cuts scans short */ | 1650 | p->highest_bit = 0; /* cuts scans short */ |
1579 | while (p->flags >= SWP_SCANNING) { | 1651 | while (p->flags >= SWP_SCANNING) { |
1652 | spin_unlock(&p->lock); | ||
1580 | spin_unlock(&swap_lock); | 1653 | spin_unlock(&swap_lock); |
1581 | schedule_timeout_uninterruptible(1); | 1654 | schedule_timeout_uninterruptible(1); |
1582 | spin_lock(&swap_lock); | 1655 | spin_lock(&swap_lock); |
1656 | spin_lock(&p->lock); | ||
1583 | } | 1657 | } |
1584 | 1658 | ||
1585 | swap_file = p->swap_file; | 1659 | swap_file = p->swap_file; |
@@ -1589,6 +1663,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) | |||
1589 | p->swap_map = NULL; | 1663 | p->swap_map = NULL; |
1590 | p->flags = 0; | 1664 | p->flags = 0; |
1591 | frontswap_invalidate_area(type); | 1665 | frontswap_invalidate_area(type); |
1666 | spin_unlock(&p->lock); | ||
1592 | spin_unlock(&swap_lock); | 1667 | spin_unlock(&swap_lock); |
1593 | mutex_unlock(&swapon_mutex); | 1668 | mutex_unlock(&swapon_mutex); |
1594 | vfree(swap_map); | 1669 | vfree(swap_map); |
@@ -1699,7 +1774,7 @@ static int swap_show(struct seq_file *swap, void *v) | |||
1699 | len = seq_path(swap, &file->f_path, " \t\n\\"); | 1774 | len = seq_path(swap, &file->f_path, " \t\n\\"); |
1700 | seq_printf(swap, "%*s%s\t%u\t%u\t%d\n", | 1775 | seq_printf(swap, "%*s%s\t%u\t%u\t%d\n", |
1701 | len < 40 ? 40 - len : 1, " ", | 1776 | len < 40 ? 40 - len : 1, " ", |
1702 | S_ISBLK(file->f_path.dentry->d_inode->i_mode) ? | 1777 | S_ISBLK(file_inode(file)->i_mode) ? |
1703 | "partition" : "file\t", | 1778 | "partition" : "file\t", |
1704 | si->pages << (PAGE_SHIFT - 10), | 1779 | si->pages << (PAGE_SHIFT - 10), |
1705 | si->inuse_pages << (PAGE_SHIFT - 10), | 1780 | si->inuse_pages << (PAGE_SHIFT - 10), |
@@ -1794,6 +1869,7 @@ static struct swap_info_struct *alloc_swap_info(void) | |||
1794 | p->flags = SWP_USED; | 1869 | p->flags = SWP_USED; |
1795 | p->next = -1; | 1870 | p->next = -1; |
1796 | spin_unlock(&swap_lock); | 1871 | spin_unlock(&swap_lock); |
1872 | spin_lock_init(&p->lock); | ||
1797 | 1873 | ||
1798 | return p; | 1874 | return p; |
1799 | } | 1875 | } |
@@ -2116,7 +2192,7 @@ void si_swapinfo(struct sysinfo *val) | |||
2116 | if ((si->flags & SWP_USED) && !(si->flags & SWP_WRITEOK)) | 2192 | if ((si->flags & SWP_USED) && !(si->flags & SWP_WRITEOK)) |
2117 | nr_to_be_unused += si->inuse_pages; | 2193 | nr_to_be_unused += si->inuse_pages; |
2118 | } | 2194 | } |
2119 | val->freeswap = nr_swap_pages + nr_to_be_unused; | 2195 | val->freeswap = atomic_long_read(&nr_swap_pages) + nr_to_be_unused; |
2120 | val->totalswap = total_swap_pages + nr_to_be_unused; | 2196 | val->totalswap = total_swap_pages + nr_to_be_unused; |
2121 | spin_unlock(&swap_lock); | 2197 | spin_unlock(&swap_lock); |
2122 | } | 2198 | } |
@@ -2149,7 +2225,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage) | |||
2149 | p = swap_info[type]; | 2225 | p = swap_info[type]; |
2150 | offset = swp_offset(entry); | 2226 | offset = swp_offset(entry); |
2151 | 2227 | ||
2152 | spin_lock(&swap_lock); | 2228 | spin_lock(&p->lock); |
2153 | if (unlikely(offset >= p->max)) | 2229 | if (unlikely(offset >= p->max)) |
2154 | goto unlock_out; | 2230 | goto unlock_out; |
2155 | 2231 | ||
@@ -2184,7 +2260,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage) | |||
2184 | p->swap_map[offset] = count | has_cache; | 2260 | p->swap_map[offset] = count | has_cache; |
2185 | 2261 | ||
2186 | unlock_out: | 2262 | unlock_out: |
2187 | spin_unlock(&swap_lock); | 2263 | spin_unlock(&p->lock); |
2188 | out: | 2264 | out: |
2189 | return err; | 2265 | return err; |
2190 | 2266 | ||
@@ -2309,7 +2385,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask) | |||
2309 | } | 2385 | } |
2310 | 2386 | ||
2311 | if (!page) { | 2387 | if (!page) { |
2312 | spin_unlock(&swap_lock); | 2388 | spin_unlock(&si->lock); |
2313 | return -ENOMEM; | 2389 | return -ENOMEM; |
2314 | } | 2390 | } |
2315 | 2391 | ||
@@ -2357,7 +2433,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask) | |||
2357 | list_add_tail(&page->lru, &head->lru); | 2433 | list_add_tail(&page->lru, &head->lru); |
2358 | page = NULL; /* now it's attached, don't free it */ | 2434 | page = NULL; /* now it's attached, don't free it */ |
2359 | out: | 2435 | out: |
2360 | spin_unlock(&swap_lock); | 2436 | spin_unlock(&si->lock); |
2361 | outer: | 2437 | outer: |
2362 | if (page) | 2438 | if (page) |
2363 | __free_page(page); | 2439 | __free_page(page); |