aboutsummaryrefslogtreecommitdiffstats
path: root/mm/ksm.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/ksm.c')
-rw-r--r--mm/ksm.c144
1 files changed, 98 insertions, 46 deletions
diff --git a/mm/ksm.c b/mm/ksm.c
index 7e4d255dadc0..722e3f2a8dc5 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -32,6 +32,7 @@
32#include <linux/mmu_notifier.h> 32#include <linux/mmu_notifier.h>
33#include <linux/ksm.h> 33#include <linux/ksm.h>
34 34
35#include <asm/tlb.h>
35#include <asm/tlbflush.h> 36#include <asm/tlbflush.h>
36 37
37/* 38/*
@@ -347,6 +348,8 @@ static void break_cow(struct mm_struct *mm, unsigned long addr)
347 struct vm_area_struct *vma; 348 struct vm_area_struct *vma;
348 349
349 down_read(&mm->mmap_sem); 350 down_read(&mm->mmap_sem);
351 if (ksm_test_exit(mm))
352 goto out;
350 vma = find_vma(mm, addr); 353 vma = find_vma(mm, addr);
351 if (!vma || vma->vm_start > addr) 354 if (!vma || vma->vm_start > addr)
352 goto out; 355 goto out;
@@ -365,6 +368,8 @@ static struct page *get_mergeable_page(struct rmap_item *rmap_item)
365 struct page *page; 368 struct page *page;
366 369
367 down_read(&mm->mmap_sem); 370 down_read(&mm->mmap_sem);
371 if (ksm_test_exit(mm))
372 goto out;
368 vma = find_vma(mm, addr); 373 vma = find_vma(mm, addr);
369 if (!vma || vma->vm_start > addr) 374 if (!vma || vma->vm_start > addr)
370 goto out; 375 goto out;
@@ -439,11 +444,11 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
439 } else if (rmap_item->address & NODE_FLAG) { 444 } else if (rmap_item->address & NODE_FLAG) {
440 unsigned char age; 445 unsigned char age;
441 /* 446 /*
442 * ksm_thread can and must skip the rb_erase, because 447 * Usually ksmd can and must skip the rb_erase, because
443 * root_unstable_tree was already reset to RB_ROOT. 448 * root_unstable_tree was already reset to RB_ROOT.
444 * But __ksm_exit has to be careful: do the rb_erase 449 * But be careful when an mm is exiting: do the rb_erase
445 * if it's interrupting a scan, and this rmap_item was 450 * if this rmap_item was inserted by this scan, rather
446 * inserted by this scan rather than left from before. 451 * than left over from before.
447 */ 452 */
448 age = (unsigned char)(ksm_scan.seqnr - rmap_item->address); 453 age = (unsigned char)(ksm_scan.seqnr - rmap_item->address);
449 BUG_ON(age > 1); 454 BUG_ON(age > 1);
@@ -491,6 +496,8 @@ static int unmerge_ksm_pages(struct vm_area_struct *vma,
491 int err = 0; 496 int err = 0;
492 497
493 for (addr = start; addr < end && !err; addr += PAGE_SIZE) { 498 for (addr = start; addr < end && !err; addr += PAGE_SIZE) {
499 if (ksm_test_exit(vma->vm_mm))
500 break;
494 if (signal_pending(current)) 501 if (signal_pending(current))
495 err = -ERESTARTSYS; 502 err = -ERESTARTSYS;
496 else 503 else
@@ -507,34 +514,50 @@ static int unmerge_and_remove_all_rmap_items(void)
507 int err = 0; 514 int err = 0;
508 515
509 spin_lock(&ksm_mmlist_lock); 516 spin_lock(&ksm_mmlist_lock);
510 mm_slot = list_entry(ksm_mm_head.mm_list.next, 517 ksm_scan.mm_slot = list_entry(ksm_mm_head.mm_list.next,
511 struct mm_slot, mm_list); 518 struct mm_slot, mm_list);
512 spin_unlock(&ksm_mmlist_lock); 519 spin_unlock(&ksm_mmlist_lock);
513 520
514 while (mm_slot != &ksm_mm_head) { 521 for (mm_slot = ksm_scan.mm_slot;
522 mm_slot != &ksm_mm_head; mm_slot = ksm_scan.mm_slot) {
515 mm = mm_slot->mm; 523 mm = mm_slot->mm;
516 down_read(&mm->mmap_sem); 524 down_read(&mm->mmap_sem);
517 for (vma = mm->mmap; vma; vma = vma->vm_next) { 525 for (vma = mm->mmap; vma; vma = vma->vm_next) {
526 if (ksm_test_exit(mm))
527 break;
518 if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma) 528 if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
519 continue; 529 continue;
520 err = unmerge_ksm_pages(vma, 530 err = unmerge_ksm_pages(vma,
521 vma->vm_start, vma->vm_end); 531 vma->vm_start, vma->vm_end);
522 if (err) { 532 if (err)
523 up_read(&mm->mmap_sem); 533 goto error;
524 goto out;
525 }
526 } 534 }
535
527 remove_trailing_rmap_items(mm_slot, mm_slot->rmap_list.next); 536 remove_trailing_rmap_items(mm_slot, mm_slot->rmap_list.next);
528 up_read(&mm->mmap_sem);
529 537
530 spin_lock(&ksm_mmlist_lock); 538 spin_lock(&ksm_mmlist_lock);
531 mm_slot = list_entry(mm_slot->mm_list.next, 539 ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next,
532 struct mm_slot, mm_list); 540 struct mm_slot, mm_list);
533 spin_unlock(&ksm_mmlist_lock); 541 if (ksm_test_exit(mm)) {
542 hlist_del(&mm_slot->link);
543 list_del(&mm_slot->mm_list);
544 spin_unlock(&ksm_mmlist_lock);
545
546 free_mm_slot(mm_slot);
547 clear_bit(MMF_VM_MERGEABLE, &mm->flags);
548 up_read(&mm->mmap_sem);
549 mmdrop(mm);
550 } else {
551 spin_unlock(&ksm_mmlist_lock);
552 up_read(&mm->mmap_sem);
553 }
534 } 554 }
535 555
536 ksm_scan.seqnr = 0; 556 ksm_scan.seqnr = 0;
537out: 557 return 0;
558
559error:
560 up_read(&mm->mmap_sem);
538 spin_lock(&ksm_mmlist_lock); 561 spin_lock(&ksm_mmlist_lock);
539 ksm_scan.mm_slot = &ksm_mm_head; 562 ksm_scan.mm_slot = &ksm_mm_head;
540 spin_unlock(&ksm_mmlist_lock); 563 spin_unlock(&ksm_mmlist_lock);
@@ -755,6 +778,9 @@ static int try_to_merge_with_ksm_page(struct mm_struct *mm1,
755 int err = -EFAULT; 778 int err = -EFAULT;
756 779
757 down_read(&mm1->mmap_sem); 780 down_read(&mm1->mmap_sem);
781 if (ksm_test_exit(mm1))
782 goto out;
783
758 vma = find_vma(mm1, addr1); 784 vma = find_vma(mm1, addr1);
759 if (!vma || vma->vm_start > addr1) 785 if (!vma || vma->vm_start > addr1)
760 goto out; 786 goto out;
@@ -796,6 +822,10 @@ static int try_to_merge_two_pages(struct mm_struct *mm1, unsigned long addr1,
796 return err; 822 return err;
797 823
798 down_read(&mm1->mmap_sem); 824 down_read(&mm1->mmap_sem);
825 if (ksm_test_exit(mm1)) {
826 up_read(&mm1->mmap_sem);
827 goto out;
828 }
799 vma = find_vma(mm1, addr1); 829 vma = find_vma(mm1, addr1);
800 if (!vma || vma->vm_start > addr1) { 830 if (!vma || vma->vm_start > addr1) {
801 up_read(&mm1->mmap_sem); 831 up_read(&mm1->mmap_sem);
@@ -1174,7 +1204,12 @@ next_mm:
1174 1204
1175 mm = slot->mm; 1205 mm = slot->mm;
1176 down_read(&mm->mmap_sem); 1206 down_read(&mm->mmap_sem);
1177 for (vma = find_vma(mm, ksm_scan.address); vma; vma = vma->vm_next) { 1207 if (ksm_test_exit(mm))
1208 vma = NULL;
1209 else
1210 vma = find_vma(mm, ksm_scan.address);
1211
1212 for (; vma; vma = vma->vm_next) {
1178 if (!(vma->vm_flags & VM_MERGEABLE)) 1213 if (!(vma->vm_flags & VM_MERGEABLE))
1179 continue; 1214 continue;
1180 if (ksm_scan.address < vma->vm_start) 1215 if (ksm_scan.address < vma->vm_start)
@@ -1183,6 +1218,8 @@ next_mm:
1183 ksm_scan.address = vma->vm_end; 1218 ksm_scan.address = vma->vm_end;
1184 1219
1185 while (ksm_scan.address < vma->vm_end) { 1220 while (ksm_scan.address < vma->vm_end) {
1221 if (ksm_test_exit(mm))
1222 break;
1186 *page = follow_page(vma, ksm_scan.address, FOLL_GET); 1223 *page = follow_page(vma, ksm_scan.address, FOLL_GET);
1187 if (*page && PageAnon(*page)) { 1224 if (*page && PageAnon(*page)) {
1188 flush_anon_page(vma, *page, ksm_scan.address); 1225 flush_anon_page(vma, *page, ksm_scan.address);
@@ -1205,6 +1242,11 @@ next_mm:
1205 } 1242 }
1206 } 1243 }
1207 1244
1245 if (ksm_test_exit(mm)) {
1246 ksm_scan.address = 0;
1247 ksm_scan.rmap_item = list_entry(&slot->rmap_list,
1248 struct rmap_item, link);
1249 }
1208 /* 1250 /*
1209 * Nuke all the rmap_items that are above this current rmap: 1251 * Nuke all the rmap_items that are above this current rmap:
1210 * because there were no VM_MERGEABLE vmas with such addresses. 1252 * because there were no VM_MERGEABLE vmas with such addresses.
@@ -1219,24 +1261,29 @@ next_mm:
1219 * We've completed a full scan of all vmas, holding mmap_sem 1261 * We've completed a full scan of all vmas, holding mmap_sem
1220 * throughout, and found no VM_MERGEABLE: so do the same as 1262 * throughout, and found no VM_MERGEABLE: so do the same as
1221 * __ksm_exit does to remove this mm from all our lists now. 1263 * __ksm_exit does to remove this mm from all our lists now.
1264 * This applies either when cleaning up after __ksm_exit
1265 * (but beware: we can reach here even before __ksm_exit),
1266 * or when all VM_MERGEABLE areas have been unmapped (and
1267 * mmap_sem then protects against race with MADV_MERGEABLE).
1222 */ 1268 */
1223 hlist_del(&slot->link); 1269 hlist_del(&slot->link);
1224 list_del(&slot->mm_list); 1270 list_del(&slot->mm_list);
1271 spin_unlock(&ksm_mmlist_lock);
1272
1225 free_mm_slot(slot); 1273 free_mm_slot(slot);
1226 clear_bit(MMF_VM_MERGEABLE, &mm->flags); 1274 clear_bit(MMF_VM_MERGEABLE, &mm->flags);
1275 up_read(&mm->mmap_sem);
1276 mmdrop(mm);
1277 } else {
1278 spin_unlock(&ksm_mmlist_lock);
1279 up_read(&mm->mmap_sem);
1227 } 1280 }
1228 spin_unlock(&ksm_mmlist_lock);
1229 up_read(&mm->mmap_sem);
1230 1281
1231 /* Repeat until we've completed scanning the whole list */ 1282 /* Repeat until we've completed scanning the whole list */
1232 slot = ksm_scan.mm_slot; 1283 slot = ksm_scan.mm_slot;
1233 if (slot != &ksm_mm_head) 1284 if (slot != &ksm_mm_head)
1234 goto next_mm; 1285 goto next_mm;
1235 1286
1236 /*
1237 * Bump seqnr here rather than at top, so that __ksm_exit
1238 * can skip rb_erase on unstable tree until we run again.
1239 */
1240 ksm_scan.seqnr++; 1287 ksm_scan.seqnr++;
1241 return NULL; 1288 return NULL;
1242} 1289}
@@ -1361,6 +1408,7 @@ int __ksm_enter(struct mm_struct *mm)
1361 spin_unlock(&ksm_mmlist_lock); 1408 spin_unlock(&ksm_mmlist_lock);
1362 1409
1363 set_bit(MMF_VM_MERGEABLE, &mm->flags); 1410 set_bit(MMF_VM_MERGEABLE, &mm->flags);
1411 atomic_inc(&mm->mm_count);
1364 1412
1365 if (needs_wakeup) 1413 if (needs_wakeup)
1366 wake_up_interruptible(&ksm_thread_wait); 1414 wake_up_interruptible(&ksm_thread_wait);
@@ -1368,41 +1416,45 @@ int __ksm_enter(struct mm_struct *mm)
1368 return 0; 1416 return 0;
1369} 1417}
1370 1418
1371void __ksm_exit(struct mm_struct *mm) 1419void __ksm_exit(struct mm_struct *mm,
1420 struct mmu_gather **tlbp, unsigned long end)
1372{ 1421{
1373 struct mm_slot *mm_slot; 1422 struct mm_slot *mm_slot;
1423 int easy_to_free = 0;
1374 1424
1375 /* 1425 /*
1376 * This process is exiting: doesn't hold and doesn't need mmap_sem; 1426 * This process is exiting: if it's straightforward (as is the
1377 * but we do need to exclude ksmd and other exiters while we modify 1427 * case when ksmd was never running), free mm_slot immediately.
1378 * the various lists and trees. 1428 * But if it's at the cursor or has rmap_items linked to it, use
1429 * mmap_sem to synchronize with any break_cows before pagetables
1430 * are freed, and leave the mm_slot on the list for ksmd to free.
1431 * Beware: ksm may already have noticed it exiting and freed the slot.
1379 */ 1432 */
1380 mutex_lock(&ksm_thread_mutex); 1433
1381 spin_lock(&ksm_mmlist_lock); 1434 spin_lock(&ksm_mmlist_lock);
1382 mm_slot = get_mm_slot(mm); 1435 mm_slot = get_mm_slot(mm);
1383 if (!list_empty(&mm_slot->rmap_list)) { 1436 if (mm_slot && ksm_scan.mm_slot != mm_slot) {
1384 spin_unlock(&ksm_mmlist_lock); 1437 if (list_empty(&mm_slot->rmap_list)) {
1385 remove_trailing_rmap_items(mm_slot, mm_slot->rmap_list.next); 1438 hlist_del(&mm_slot->link);
1386 spin_lock(&ksm_mmlist_lock); 1439 list_del(&mm_slot->mm_list);
1387 } 1440 easy_to_free = 1;
1388 1441 } else {
1389 if (ksm_scan.mm_slot == mm_slot) { 1442 list_move(&mm_slot->mm_list,
1390 ksm_scan.mm_slot = list_entry( 1443 &ksm_scan.mm_slot->mm_list);
1391 mm_slot->mm_list.next, struct mm_slot, mm_list); 1444 }
1392 ksm_scan.address = 0;
1393 ksm_scan.rmap_item = list_entry(
1394 &ksm_scan.mm_slot->rmap_list, struct rmap_item, link);
1395 if (ksm_scan.mm_slot == &ksm_mm_head)
1396 ksm_scan.seqnr++;
1397 } 1445 }
1398
1399 hlist_del(&mm_slot->link);
1400 list_del(&mm_slot->mm_list);
1401 spin_unlock(&ksm_mmlist_lock); 1446 spin_unlock(&ksm_mmlist_lock);
1402 1447
1403 free_mm_slot(mm_slot); 1448 if (easy_to_free) {
1404 clear_bit(MMF_VM_MERGEABLE, &mm->flags); 1449 free_mm_slot(mm_slot);
1405 mutex_unlock(&ksm_thread_mutex); 1450 clear_bit(MMF_VM_MERGEABLE, &mm->flags);
1451 mmdrop(mm);
1452 } else if (mm_slot) {
1453 tlb_finish_mmu(*tlbp, 0, end);
1454 down_write(&mm->mmap_sem);
1455 up_write(&mm->mmap_sem);
1456 *tlbp = tlb_gather_mmu(mm, 1);
1457 }
1406} 1458}
1407 1459
1408#define KSM_ATTR_RO(_name) \ 1460#define KSM_ATTR_RO(_name) \