diff options
Diffstat (limited to 'mm/ksm.c')
-rw-r--r-- | mm/ksm.c | 144 |
1 files changed, 98 insertions, 46 deletions
@@ -32,6 +32,7 @@ | |||
32 | #include <linux/mmu_notifier.h> | 32 | #include <linux/mmu_notifier.h> |
33 | #include <linux/ksm.h> | 33 | #include <linux/ksm.h> |
34 | 34 | ||
35 | #include <asm/tlb.h> | ||
35 | #include <asm/tlbflush.h> | 36 | #include <asm/tlbflush.h> |
36 | 37 | ||
37 | /* | 38 | /* |
@@ -347,6 +348,8 @@ static void break_cow(struct mm_struct *mm, unsigned long addr) | |||
347 | struct vm_area_struct *vma; | 348 | struct vm_area_struct *vma; |
348 | 349 | ||
349 | down_read(&mm->mmap_sem); | 350 | down_read(&mm->mmap_sem); |
351 | if (ksm_test_exit(mm)) | ||
352 | goto out; | ||
350 | vma = find_vma(mm, addr); | 353 | vma = find_vma(mm, addr); |
351 | if (!vma || vma->vm_start > addr) | 354 | if (!vma || vma->vm_start > addr) |
352 | goto out; | 355 | goto out; |
@@ -365,6 +368,8 @@ static struct page *get_mergeable_page(struct rmap_item *rmap_item) | |||
365 | struct page *page; | 368 | struct page *page; |
366 | 369 | ||
367 | down_read(&mm->mmap_sem); | 370 | down_read(&mm->mmap_sem); |
371 | if (ksm_test_exit(mm)) | ||
372 | goto out; | ||
368 | vma = find_vma(mm, addr); | 373 | vma = find_vma(mm, addr); |
369 | if (!vma || vma->vm_start > addr) | 374 | if (!vma || vma->vm_start > addr) |
370 | goto out; | 375 | goto out; |
@@ -439,11 +444,11 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item) | |||
439 | } else if (rmap_item->address & NODE_FLAG) { | 444 | } else if (rmap_item->address & NODE_FLAG) { |
440 | unsigned char age; | 445 | unsigned char age; |
441 | /* | 446 | /* |
442 | * ksm_thread can and must skip the rb_erase, because | 447 | * Usually ksmd can and must skip the rb_erase, because |
443 | * root_unstable_tree was already reset to RB_ROOT. | 448 | * root_unstable_tree was already reset to RB_ROOT. |
444 | * But __ksm_exit has to be careful: do the rb_erase | 449 | * But be careful when an mm is exiting: do the rb_erase |
445 | * if it's interrupting a scan, and this rmap_item was | 450 | * if this rmap_item was inserted by this scan, rather |
446 | * inserted by this scan rather than left from before. | 451 | * than left over from before. |
447 | */ | 452 | */ |
448 | age = (unsigned char)(ksm_scan.seqnr - rmap_item->address); | 453 | age = (unsigned char)(ksm_scan.seqnr - rmap_item->address); |
449 | BUG_ON(age > 1); | 454 | BUG_ON(age > 1); |
@@ -491,6 +496,8 @@ static int unmerge_ksm_pages(struct vm_area_struct *vma, | |||
491 | int err = 0; | 496 | int err = 0; |
492 | 497 | ||
493 | for (addr = start; addr < end && !err; addr += PAGE_SIZE) { | 498 | for (addr = start; addr < end && !err; addr += PAGE_SIZE) { |
499 | if (ksm_test_exit(vma->vm_mm)) | ||
500 | break; | ||
494 | if (signal_pending(current)) | 501 | if (signal_pending(current)) |
495 | err = -ERESTARTSYS; | 502 | err = -ERESTARTSYS; |
496 | else | 503 | else |
@@ -507,34 +514,50 @@ static int unmerge_and_remove_all_rmap_items(void) | |||
507 | int err = 0; | 514 | int err = 0; |
508 | 515 | ||
509 | spin_lock(&ksm_mmlist_lock); | 516 | spin_lock(&ksm_mmlist_lock); |
510 | mm_slot = list_entry(ksm_mm_head.mm_list.next, | 517 | ksm_scan.mm_slot = list_entry(ksm_mm_head.mm_list.next, |
511 | struct mm_slot, mm_list); | 518 | struct mm_slot, mm_list); |
512 | spin_unlock(&ksm_mmlist_lock); | 519 | spin_unlock(&ksm_mmlist_lock); |
513 | 520 | ||
514 | while (mm_slot != &ksm_mm_head) { | 521 | for (mm_slot = ksm_scan.mm_slot; |
522 | mm_slot != &ksm_mm_head; mm_slot = ksm_scan.mm_slot) { | ||
515 | mm = mm_slot->mm; | 523 | mm = mm_slot->mm; |
516 | down_read(&mm->mmap_sem); | 524 | down_read(&mm->mmap_sem); |
517 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | 525 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
526 | if (ksm_test_exit(mm)) | ||
527 | break; | ||
518 | if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma) | 528 | if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma) |
519 | continue; | 529 | continue; |
520 | err = unmerge_ksm_pages(vma, | 530 | err = unmerge_ksm_pages(vma, |
521 | vma->vm_start, vma->vm_end); | 531 | vma->vm_start, vma->vm_end); |
522 | if (err) { | 532 | if (err) |
523 | up_read(&mm->mmap_sem); | 533 | goto error; |
524 | goto out; | ||
525 | } | ||
526 | } | 534 | } |
535 | |||
527 | remove_trailing_rmap_items(mm_slot, mm_slot->rmap_list.next); | 536 | remove_trailing_rmap_items(mm_slot, mm_slot->rmap_list.next); |
528 | up_read(&mm->mmap_sem); | ||
529 | 537 | ||
530 | spin_lock(&ksm_mmlist_lock); | 538 | spin_lock(&ksm_mmlist_lock); |
531 | mm_slot = list_entry(mm_slot->mm_list.next, | 539 | ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next, |
532 | struct mm_slot, mm_list); | 540 | struct mm_slot, mm_list); |
533 | spin_unlock(&ksm_mmlist_lock); | 541 | if (ksm_test_exit(mm)) { |
542 | hlist_del(&mm_slot->link); | ||
543 | list_del(&mm_slot->mm_list); | ||
544 | spin_unlock(&ksm_mmlist_lock); | ||
545 | |||
546 | free_mm_slot(mm_slot); | ||
547 | clear_bit(MMF_VM_MERGEABLE, &mm->flags); | ||
548 | up_read(&mm->mmap_sem); | ||
549 | mmdrop(mm); | ||
550 | } else { | ||
551 | spin_unlock(&ksm_mmlist_lock); | ||
552 | up_read(&mm->mmap_sem); | ||
553 | } | ||
534 | } | 554 | } |
535 | 555 | ||
536 | ksm_scan.seqnr = 0; | 556 | ksm_scan.seqnr = 0; |
537 | out: | 557 | return 0; |
558 | |||
559 | error: | ||
560 | up_read(&mm->mmap_sem); | ||
538 | spin_lock(&ksm_mmlist_lock); | 561 | spin_lock(&ksm_mmlist_lock); |
539 | ksm_scan.mm_slot = &ksm_mm_head; | 562 | ksm_scan.mm_slot = &ksm_mm_head; |
540 | spin_unlock(&ksm_mmlist_lock); | 563 | spin_unlock(&ksm_mmlist_lock); |
@@ -755,6 +778,9 @@ static int try_to_merge_with_ksm_page(struct mm_struct *mm1, | |||
755 | int err = -EFAULT; | 778 | int err = -EFAULT; |
756 | 779 | ||
757 | down_read(&mm1->mmap_sem); | 780 | down_read(&mm1->mmap_sem); |
781 | if (ksm_test_exit(mm1)) | ||
782 | goto out; | ||
783 | |||
758 | vma = find_vma(mm1, addr1); | 784 | vma = find_vma(mm1, addr1); |
759 | if (!vma || vma->vm_start > addr1) | 785 | if (!vma || vma->vm_start > addr1) |
760 | goto out; | 786 | goto out; |
@@ -796,6 +822,10 @@ static int try_to_merge_two_pages(struct mm_struct *mm1, unsigned long addr1, | |||
796 | return err; | 822 | return err; |
797 | 823 | ||
798 | down_read(&mm1->mmap_sem); | 824 | down_read(&mm1->mmap_sem); |
825 | if (ksm_test_exit(mm1)) { | ||
826 | up_read(&mm1->mmap_sem); | ||
827 | goto out; | ||
828 | } | ||
799 | vma = find_vma(mm1, addr1); | 829 | vma = find_vma(mm1, addr1); |
800 | if (!vma || vma->vm_start > addr1) { | 830 | if (!vma || vma->vm_start > addr1) { |
801 | up_read(&mm1->mmap_sem); | 831 | up_read(&mm1->mmap_sem); |
@@ -1174,7 +1204,12 @@ next_mm: | |||
1174 | 1204 | ||
1175 | mm = slot->mm; | 1205 | mm = slot->mm; |
1176 | down_read(&mm->mmap_sem); | 1206 | down_read(&mm->mmap_sem); |
1177 | for (vma = find_vma(mm, ksm_scan.address); vma; vma = vma->vm_next) { | 1207 | if (ksm_test_exit(mm)) |
1208 | vma = NULL; | ||
1209 | else | ||
1210 | vma = find_vma(mm, ksm_scan.address); | ||
1211 | |||
1212 | for (; vma; vma = vma->vm_next) { | ||
1178 | if (!(vma->vm_flags & VM_MERGEABLE)) | 1213 | if (!(vma->vm_flags & VM_MERGEABLE)) |
1179 | continue; | 1214 | continue; |
1180 | if (ksm_scan.address < vma->vm_start) | 1215 | if (ksm_scan.address < vma->vm_start) |
@@ -1183,6 +1218,8 @@ next_mm: | |||
1183 | ksm_scan.address = vma->vm_end; | 1218 | ksm_scan.address = vma->vm_end; |
1184 | 1219 | ||
1185 | while (ksm_scan.address < vma->vm_end) { | 1220 | while (ksm_scan.address < vma->vm_end) { |
1221 | if (ksm_test_exit(mm)) | ||
1222 | break; | ||
1186 | *page = follow_page(vma, ksm_scan.address, FOLL_GET); | 1223 | *page = follow_page(vma, ksm_scan.address, FOLL_GET); |
1187 | if (*page && PageAnon(*page)) { | 1224 | if (*page && PageAnon(*page)) { |
1188 | flush_anon_page(vma, *page, ksm_scan.address); | 1225 | flush_anon_page(vma, *page, ksm_scan.address); |
@@ -1205,6 +1242,11 @@ next_mm: | |||
1205 | } | 1242 | } |
1206 | } | 1243 | } |
1207 | 1244 | ||
1245 | if (ksm_test_exit(mm)) { | ||
1246 | ksm_scan.address = 0; | ||
1247 | ksm_scan.rmap_item = list_entry(&slot->rmap_list, | ||
1248 | struct rmap_item, link); | ||
1249 | } | ||
1208 | /* | 1250 | /* |
1209 | * Nuke all the rmap_items that are above this current rmap: | 1251 | * Nuke all the rmap_items that are above this current rmap: |
1210 | * because there were no VM_MERGEABLE vmas with such addresses. | 1252 | * because there were no VM_MERGEABLE vmas with such addresses. |
@@ -1219,24 +1261,29 @@ next_mm: | |||
1219 | * We've completed a full scan of all vmas, holding mmap_sem | 1261 | * We've completed a full scan of all vmas, holding mmap_sem |
1220 | * throughout, and found no VM_MERGEABLE: so do the same as | 1262 | * throughout, and found no VM_MERGEABLE: so do the same as |
1221 | * __ksm_exit does to remove this mm from all our lists now. | 1263 | * __ksm_exit does to remove this mm from all our lists now. |
1264 | * This applies either when cleaning up after __ksm_exit | ||
1265 | * (but beware: we can reach here even before __ksm_exit), | ||
1266 | * or when all VM_MERGEABLE areas have been unmapped (and | ||
1267 | * mmap_sem then protects against race with MADV_MERGEABLE). | ||
1222 | */ | 1268 | */ |
1223 | hlist_del(&slot->link); | 1269 | hlist_del(&slot->link); |
1224 | list_del(&slot->mm_list); | 1270 | list_del(&slot->mm_list); |
1271 | spin_unlock(&ksm_mmlist_lock); | ||
1272 | |||
1225 | free_mm_slot(slot); | 1273 | free_mm_slot(slot); |
1226 | clear_bit(MMF_VM_MERGEABLE, &mm->flags); | 1274 | clear_bit(MMF_VM_MERGEABLE, &mm->flags); |
1275 | up_read(&mm->mmap_sem); | ||
1276 | mmdrop(mm); | ||
1277 | } else { | ||
1278 | spin_unlock(&ksm_mmlist_lock); | ||
1279 | up_read(&mm->mmap_sem); | ||
1227 | } | 1280 | } |
1228 | spin_unlock(&ksm_mmlist_lock); | ||
1229 | up_read(&mm->mmap_sem); | ||
1230 | 1281 | ||
1231 | /* Repeat until we've completed scanning the whole list */ | 1282 | /* Repeat until we've completed scanning the whole list */ |
1232 | slot = ksm_scan.mm_slot; | 1283 | slot = ksm_scan.mm_slot; |
1233 | if (slot != &ksm_mm_head) | 1284 | if (slot != &ksm_mm_head) |
1234 | goto next_mm; | 1285 | goto next_mm; |
1235 | 1286 | ||
1236 | /* | ||
1237 | * Bump seqnr here rather than at top, so that __ksm_exit | ||
1238 | * can skip rb_erase on unstable tree until we run again. | ||
1239 | */ | ||
1240 | ksm_scan.seqnr++; | 1287 | ksm_scan.seqnr++; |
1241 | return NULL; | 1288 | return NULL; |
1242 | } | 1289 | } |
@@ -1361,6 +1408,7 @@ int __ksm_enter(struct mm_struct *mm) | |||
1361 | spin_unlock(&ksm_mmlist_lock); | 1408 | spin_unlock(&ksm_mmlist_lock); |
1362 | 1409 | ||
1363 | set_bit(MMF_VM_MERGEABLE, &mm->flags); | 1410 | set_bit(MMF_VM_MERGEABLE, &mm->flags); |
1411 | atomic_inc(&mm->mm_count); | ||
1364 | 1412 | ||
1365 | if (needs_wakeup) | 1413 | if (needs_wakeup) |
1366 | wake_up_interruptible(&ksm_thread_wait); | 1414 | wake_up_interruptible(&ksm_thread_wait); |
@@ -1368,41 +1416,45 @@ int __ksm_enter(struct mm_struct *mm) | |||
1368 | return 0; | 1416 | return 0; |
1369 | } | 1417 | } |
1370 | 1418 | ||
1371 | void __ksm_exit(struct mm_struct *mm) | 1419 | void __ksm_exit(struct mm_struct *mm, |
1420 | struct mmu_gather **tlbp, unsigned long end) | ||
1372 | { | 1421 | { |
1373 | struct mm_slot *mm_slot; | 1422 | struct mm_slot *mm_slot; |
1423 | int easy_to_free = 0; | ||
1374 | 1424 | ||
1375 | /* | 1425 | /* |
1376 | * This process is exiting: doesn't hold and doesn't need mmap_sem; | 1426 | * This process is exiting: if it's straightforward (as is the |
1377 | * but we do need to exclude ksmd and other exiters while we modify | 1427 | * case when ksmd was never running), free mm_slot immediately. |
1378 | * the various lists and trees. | 1428 | * But if it's at the cursor or has rmap_items linked to it, use |
1429 | * mmap_sem to synchronize with any break_cows before pagetables | ||
1430 | * are freed, and leave the mm_slot on the list for ksmd to free. | ||
1431 | * Beware: ksm may already have noticed it exiting and freed the slot. | ||
1379 | */ | 1432 | */ |
1380 | mutex_lock(&ksm_thread_mutex); | 1433 | |
1381 | spin_lock(&ksm_mmlist_lock); | 1434 | spin_lock(&ksm_mmlist_lock); |
1382 | mm_slot = get_mm_slot(mm); | 1435 | mm_slot = get_mm_slot(mm); |
1383 | if (!list_empty(&mm_slot->rmap_list)) { | 1436 | if (mm_slot && ksm_scan.mm_slot != mm_slot) { |
1384 | spin_unlock(&ksm_mmlist_lock); | 1437 | if (list_empty(&mm_slot->rmap_list)) { |
1385 | remove_trailing_rmap_items(mm_slot, mm_slot->rmap_list.next); | 1438 | hlist_del(&mm_slot->link); |
1386 | spin_lock(&ksm_mmlist_lock); | 1439 | list_del(&mm_slot->mm_list); |
1387 | } | 1440 | easy_to_free = 1; |
1388 | 1441 | } else { | |
1389 | if (ksm_scan.mm_slot == mm_slot) { | 1442 | list_move(&mm_slot->mm_list, |
1390 | ksm_scan.mm_slot = list_entry( | 1443 | &ksm_scan.mm_slot->mm_list); |
1391 | mm_slot->mm_list.next, struct mm_slot, mm_list); | 1444 | } |
1392 | ksm_scan.address = 0; | ||
1393 | ksm_scan.rmap_item = list_entry( | ||
1394 | &ksm_scan.mm_slot->rmap_list, struct rmap_item, link); | ||
1395 | if (ksm_scan.mm_slot == &ksm_mm_head) | ||
1396 | ksm_scan.seqnr++; | ||
1397 | } | 1445 | } |
1398 | |||
1399 | hlist_del(&mm_slot->link); | ||
1400 | list_del(&mm_slot->mm_list); | ||
1401 | spin_unlock(&ksm_mmlist_lock); | 1446 | spin_unlock(&ksm_mmlist_lock); |
1402 | 1447 | ||
1403 | free_mm_slot(mm_slot); | 1448 | if (easy_to_free) { |
1404 | clear_bit(MMF_VM_MERGEABLE, &mm->flags); | 1449 | free_mm_slot(mm_slot); |
1405 | mutex_unlock(&ksm_thread_mutex); | 1450 | clear_bit(MMF_VM_MERGEABLE, &mm->flags); |
1451 | mmdrop(mm); | ||
1452 | } else if (mm_slot) { | ||
1453 | tlb_finish_mmu(*tlbp, 0, end); | ||
1454 | down_write(&mm->mmap_sem); | ||
1455 | up_write(&mm->mmap_sem); | ||
1456 | *tlbp = tlb_gather_mmu(mm, 1); | ||
1457 | } | ||
1406 | } | 1458 | } |
1407 | 1459 | ||
1408 | #define KSM_ATTR_RO(_name) \ | 1460 | #define KSM_ATTR_RO(_name) \ |