/Documentation/acpi/

'>cgit logo index : litmus-rt.git
The LITMUS^RT kernel.Bjoern Brandenburg
aboutsummaryrefslogblamecommitdiffstats
path: root/mm/ksm.c
blob: 51573858938d1435a5b74c2c36dbd833b5c27837 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
  




                                                                          
                                        



                        
                    

                                                                   


                        

                     
                       










                           
                         
                               
                       
                      
                       
                          
                      
 
                         
                     












































                                                                              
                                                                       




                                                 
                                    






                                                       
                                                                   






                                                                             
                                     



                            


                                                       
                                            



                                
                           


   
                                                                
                                                                  
                                                                          


                                                                            


                                                                     

                  
                                    
                                                         

                                                                             
                                                           
               




                                                                          



                                                                     

                                                                    




                                                   


                                                            








                                                       
                                            


                                            
                                      
 
                                                               
                                       
 





                                                                  
                                                   
                                                   

                                                    
                                                    



                         
                                           














                                                                             



                                                           

                                                   
                               


                 


                                              







                                            
                                              





                                                     





                                                                   



                                                              
                         



                                                    









                                                                    











                                                            





                                                        
                                                                   











                                                            
                                                                   
                         








                                                             












                                                                             









                                                                         
                                                                    

                          
                    



                                                        
                                         






                                                                          





























                                                                             

 













                                                                      
                                                  
 

                                                

                                   



                                                                     
                                          
 
                                 


                                           


                               


                                                               
                                                              
                  

                                                                     
                   





                                    







                                                                   

                                           


                                                
                                 
                         
                                                               









                                                 









                                                                            
                                                  









































                                                                             
                                              


















                                                                       
  




                                                                          

                                                
                                  
 
                                              


                                                 
 
                                
                                             

                                  
 


                                             
                                           
 
                                                  
                                                
 
                                                        

                                  
                                                                       
                                                                   


                                                                        

                                                                           
                                

                                                                        
 
                                     
                                                
         
    


                                                                       
                                                               
                                                                    
 


                                                         
                                                      










                                                                          




                                                                         
   

                                                                    

                           
                    
 
                                                                   

                                              





                                                   

 



                                                   
                                                  



                                   


                                    
                                                               

                                                                         
 

                                                                              


                                                               

                                              

                                                                              

                                                                            

                                           
                 
 
                                                                         

                                            
                                                                    
                                                                         












                                                                

         
                           



                               
                                    
                                        
                                      
                   
 
                         
 


                                           
                                       
                                                   
                            







                                                               

                                   
                                              

                             
















                                                                            

                                                               




                                              
                                        




                                                                      

                                                           
                            
 
                                                   




                                                               
                                                                               











                                                                               
                                                                            
                                                          

                                        


                                                          






                                                         

                                                                    





                                                     


                                                     



                                            

                                                                      

                                          



                           
                          

                                                               
 
                                              


                            

                                    
                         
                                     
 



                                                                      


                                                        
                            

         
                        
                                             


                                                    
                                                                            
 
                               

                                       
                       


                                    

                                                                    



                   


























                                                                               

                                                                 

                                                              

                                                                    



                                                                       
                                                                       



                                  


                                                                     

                                            


                                                                            
                            

                         






                                                                   
                                
                         





                                                                       












                                                                         
 
                                                           
                                       

                                          




                                                                      
 
                          




                   

                                                                         

                                                                       
   

                                                                            
 
                                             


                                   

                                 
                         

                                                       

                         
                                                      



                                                                         

                                            
    
                               



                   


                                                                     

                                                                          
  
                                                                         

                                                                    



                                                                            
 
                
 
                                                                
                   
                                                                
                                                                         
                  

                                                                      
                   
                        
                                             
         
                                 


  
                                                              



                                                                     
                                                                            

                  
                                                         

                                                        
                                        
 


                                                                     
                            

         
                      
                                       

                        
                               
                                                                       


                                                      
 
                                                    
 

                                            
                                             

                                            
                                              
                      
                                         








                                                                 

                                                                        
   
                                                                 


                                                         
                                        

                      
                                       

                        
                               
                                                                       


                                                      
 

                                                     















                                                                              


                                          
 




                                                               
                                               

                                                 
                           


  

                                                           










                                                                          




                                                                          





                                                           
                                       

                        
                               
                                                                        
                                                               
                                              


                                    
                                                                 
                   

                                            


                                    
                                                    


                              
                                            

                                               
                                            

                                                
                                                



                                              
                                            



                                                               
                             








                                                                      
                                                               
 
                                      
                                          
                                                               
 



                                    


  



                                                                              





                                                                              
                                         
                                      
                                        
                           


                              
                                              

                                                                           

                                         
                                                                         




                                                                
                                         
                                                                               
                                           
                 
                                



                       



                                                                           






                                                  

                                                                         
                             


                                                                           




                                                                              
                            
                                                                   
 
                                         




                                                                                
                                           
 





                                                                              
                                           

                                                          

                         



                                                                    
                                                                         



                                                               

                                       
                                                             
                                         

                                              
                                                  
                                                      







                                                

                                                  















                                                                    











                                                                             





                                                                               





                                                                             

                                     
                                                      



                                 





                                                     







                                                         

                                              
                                                                             




                                                              

                                                              


                                                                              
                                                                              
                                                

                                                                              





                                                                      
                                        




                                                      

                                     
                                                      
         



                                                                       
                                                             

                                    






                                                                            



                                                                            


                                         

                                              

                                                        




                                              
         

                                                                  
                                


                                 










                                                                   
                                             
 
                                                             









                                                                 




                                                                              

                                         
                        
                                  

                                        

                                              
                                                              

                                                

                                
                                        


                                                                              
                                                             
                                                                            




                 



                                                                       
                







                                                                            
                                                                          

                                                                            




                                       




                                                              







                                                                            




                                                                 









                                           



                                  


                               


                                                            









                                                                            
                                              
                                  



                                                        


                 
                                     
 
                                
                             
 
          





                                                                              
           
 

                                    
                                                     
                                          






                                                              
         

                                      




                                                        

                                          
         

 




                                                                          








                                                                      
                                                        




                                                                     










                                                                    
                                 






                                             
      
                                                                            
                                                                
                                            
                                           
 
                                             

                                                                        
                                        













                                                                              
 
                                                                    
                                                                         


                                                           
                                               


                                 

                                
    








                                                             
                                 






                                             
      
                                                                            
                                                                
                                            
                                           
 
                                             

                                                                        
                                        














                                                                              
                                                               


                                         
                                               
         

                                
    


                   


















                                                                             
                                            

                                           
                                             

                                                                        
                                        













                                                                              
                                                               


                                         
                                               
















                                                                 

                                                                     



                             



























                                                                              




                                                                                
                   
                                                                           




















                                                                               




                                                                   








































































                                                                           

                                                                     




                                      
                                              
                                                 
                                                                  
                                                   




                                                       









                                                        









                                                                         
                                                        


                           






























                                                                          



                                        

                                 


                                  






                                                
                         









                                       



                                                                  
                              

         
                   


                                                                
                                         
                              
         


                                                                         
                         
 






                                                                             

                 
         


                        
 
                     
/*
 * Memory merging support.
 *
 * This code enables dynamic sharing of identical pages found in different
 * memory areas, even if they are not shared by fork()
 *
 * Copyright (C) 2008-2009 Red Hat, Inc.
 * Authors:
 *	Izik Eidus
 *	Andrea Arcangeli
 *	Chris Wright
 *	Hugh Dickins
 *
 * This work is licensed under the terms of the GNU GPL, version 2.
 */

#include <linux/errno.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/mman.h>
#include <linux/sched.h>
#include <linux/rwsem.h>
#include <linux/pagemap.h>
#include <linux/rmap.h>
#include <linux/spinlock.h>
#include <linux/jhash.h>
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/wait.h>
#include <linux/slab.h>
#include <linux/rbtree.h>
#include <linux/memory.h>
#include <linux/mmu_notifier.h>
#include <linux/swap.h>
#include <linux/ksm.h>
#include <linux/hash.h>
#include <linux/freezer.h>
#include <linux/oom.h>

#include <asm/tlbflush.h>
#include "internal.h"

/*
 * A few notes about the KSM scanning process,
 * to make it easier to understand the data structures below:
 *
 * In order to reduce excessive scanning, KSM sorts the memory pages by their
 * contents into a data structure that holds pointers to the pages' locations.
 *
 * Since the contents of the pages may change at any moment, KSM cannot just
 * insert the pages into a normal sorted tree and expect it to find anything.
 * Therefore KSM uses two data structures - the stable and the unstable tree.
 *
 * The stable tree holds pointers to all the merged pages (ksm pages), sorted
 * by their contents.  Because each such page is write-protected, searching on
 * this tree is fully assured to be working (except when pages are unmapped),
 * and therefore this tree is called the stable tree.
 *
 * In addition to the stable tree, KSM uses a second data structure called the
 * unstable tree: this tree holds pointers to pages which have been found to
 * be "unchanged for a period of time".  The unstable tree sorts these pages
 * by their contents, but since they are not write-protected, KSM cannot rely
 * upon the unstable tree to work correctly - the unstable tree is liable to
 * be corrupted as its contents are modified, and so it is called unstable.
 *
 * KSM solves this problem by several techniques:
 *
 * 1) The unstable tree is flushed every time KSM completes scanning all
 *    memory areas, and then the tree is rebuilt again from the beginning.
 * 2) KSM will only insert into the unstable tree, pages whose hash value
 *    has not changed since the previous scan of all memory areas.
 * 3) The unstable tree is a RedBlack Tree - so its balancing is based on the
 *    colors of the nodes and not on their contents, assuring that even when
 *    the tree gets "corrupted" it won't get out of balance, so scanning time
 *    remains the same (also, searching and inserting nodes in an rbtree uses
 *    the same algorithm, so we have no overhead when we flush and rebuild).
 * 4) KSM never flushes the stable tree, which means that even if it were to
 *    take 10 attempts to find a page in the unstable tree, once it is found,
 *    it is secured in the stable tree.  (When we scan a new page, we first
 *    compare it against the stable tree, and then against the unstable tree.)
 */

/**
 * struct mm_slot - ksm information per mm that is being scanned
 * @link: link to the mm_slots hash list
 * @mm_list: link into the mm_slots list, rooted in ksm_mm_head
 * @rmap_list: head for this mm_slot's singly-linked list of rmap_items
 * @mm: the mm that this information is valid for
 */
struct mm_slot {
	struct hlist_node link;
	struct list_head mm_list;
	struct rmap_item *rmap_list;
	struct mm_struct *mm;
};

/**
 * struct ksm_scan - cursor for scanning
 * @mm_slot: the current mm_slot we are scanning
 * @address: the next address inside that to be scanned
 * @rmap_list: link to the next rmap to be scanned in the rmap_list
 * @seqnr: count of completed full scans (needed when removing unstable node)
 *
 * There is only the one ksm_scan instance of this cursor structure.
 */
struct ksm_scan {
	struct mm_slot *mm_slot;
	unsigned long address;
	struct rmap_item **rmap_list;
	unsigned long seqnr;
};

/**
 * struct stable_node - node of the stable rbtree
 * @node: rb node of this ksm page in the stable tree
 * @hlist: hlist head of rmap_items using this ksm page
 * @kpfn: page frame number of this ksm page
 */
struct stable_node {
	struct rb_node node;
	struct hlist_head hlist;
	unsigned long kpfn;
};

/**
 * struct rmap_item - reverse mapping item for virtual addresses
 * @rmap_list: next rmap_item in mm_slot's singly-linked rmap_list
 * @anon_vma: pointer to anon_vma for this mm,address, when in stable tree
 * @mm: the memory structure this rmap_item is pointing into
 * @address: the virtual address this rmap_item tracks (+ flags in low bits)
 * @oldchecksum: previous checksum of the page at that virtual address
 * @node: rb node of this rmap_item in the unstable tree
 * @head: pointer to stable_node heading this list in the stable tree
 * @hlist: link into hlist of rmap_items hanging off that stable_node
 */
struct rmap_item {
	struct rmap_item *rmap_list;
	struct anon_vma *anon_vma;	/* when stable */
	struct mm_struct *mm;
	unsigned long address;		/* + low bits used for flags below */
	unsigned int oldchecksum;	/* when unstable */
	union {
		struct rb_node node;	/* when node of unstable tree */
		struct {		/* when listed from stable tree */
			struct stable_node *head;
			struct hlist_node hlist;
		};
	};
};

#define SEQNR_MASK	0x0ff	/* low bits of unstable tree seqnr */
#define UNSTABLE_FLAG	0x100	/* is a node of the unstable tree */
#define STABLE_FLAG	0x200	/* is listed from the stable tree */

/* The stable and unstable tree heads */
static struct rb_root root_stable_tree = RB_ROOT;
static struct rb_root root_unstable_tree = RB_ROOT;

#define MM_SLOTS_HASH_SHIFT 10
#define MM_SLOTS_HASH_HEADS (1 << MM_SLOTS_HASH_SHIFT)
static struct hlist_head mm_slots_hash[MM_SLOTS_HASH_HEADS];

static struct mm_slot ksm_mm_head = {
	.mm_list = LIST_HEAD_INIT(ksm_mm_head.mm_list),
};
static struct ksm_scan ksm_scan = {
	.mm_slot = &ksm_mm_head,
};

static struct kmem_cache *rmap_item_cache;
static struct kmem_cache *stable_node_cache;
static struct kmem_cache *mm_slot_cache;

/* The number of nodes in the stable tree */
static unsigned long ksm_pages_shared;

/* The number of page slots additionally sharing those nodes */
static unsigned long ksm_pages_sharing;

/* The number of nodes in the unstable tree */
static unsigned long ksm_pages_unshared;

/* The number of rmap_items in use: to calculate pages_volatile */
static unsigned long ksm_rmap_items;

/* Number of pages ksmd should scan in one batch */
static unsigned int ksm_thread_pages_to_scan = 100;

/* Milliseconds ksmd should sleep between batches */
static unsigned int ksm_thread_sleep_millisecs = 20;

#define KSM_RUN_STOP	0
#define KSM_RUN_MERGE	1
#define KSM_RUN_UNMERGE	2
static unsigned int ksm_run = KSM_RUN_STOP;

static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
static DEFINE_MUTEX(ksm_thread_mutex);
static DEFINE_SPINLOCK(ksm_mmlist_lock);

#define KSM_KMEM_CACHE(__struct, __flags) kmem_cache_create("ksm_"#__struct,\
		sizeof(struct __struct), __alignof__(struct __struct),\
		(__flags), NULL)

static int __init ksm_slab_init(void)
{
	rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0);
	if (!rmap_item_cache)
		goto out;

	stable_node_cache = KSM_KMEM_CACHE(stable_node, 0);
	if (!stable_node_cache)
		goto out_free1;

	mm_slot_cache = KSM_KMEM_CACHE(mm_slot, 0);
	if (!mm_slot_cache)
		goto out_free2;

	return 0;

out_free2:
	kmem_cache_destroy(stable_node_cache);
out_free1:
	kmem_cache_destroy(rmap_item_cache);
out:
	return -ENOMEM;
}

static void __init ksm_slab_free(void)
{
	kmem_cache_destroy(mm_slot_cache);
	kmem_cache_destroy(stable_node_cache);
	kmem_cache_destroy(rmap_item_cache);
	mm_slot_cache = NULL;
}

static inline struct rmap_item *alloc_rmap_item(void)
{
	struct rmap_item *rmap_item;

	rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL);
	if (rmap_item)
		ksm_rmap_items++;
	return rmap_item;
}

static inline void free_rmap_item(struct rmap_item *rmap_item)
{
	ksm_rmap_items--;
	rmap_item->mm = NULL;	/* debug safety */
	kmem_cache_free(rmap_item_cache, rmap_item);
}

static inline struct stable_node *alloc_stable_node(void)
{
	return kmem_cache_alloc(stable_node_cache, GFP_KERNEL);
}

static inline void free_stable_node(struct stable_node *stable_node)
{
	kmem_cache_free(stable_node_cache, stable_node);
}

static inline struct mm_slot *alloc_mm_slot(void)
{
	if (!mm_slot_cache)	/* initialization failed */
		return NULL;
	return kmem_cache_zalloc(mm_slot_cache, GFP_KERNEL);
}

static inline void free_mm_slot(struct mm_slot *mm_slot)
{
	kmem_cache_free(mm_slot_cache, mm_slot);
}

static struct mm_slot *get_mm_slot(struct mm_struct *mm)
{
	struct mm_slot *mm_slot;
	struct hlist_head *bucket;
	struct hlist_node *node;

	bucket = &mm_slots_hash[hash_ptr(mm, MM_SLOTS_HASH_SHIFT)];
	hlist_for_each_entry(mm_slot, node, bucket, link) {
		if (mm == mm_slot->mm)
			return mm_slot;
	}
	return NULL;
}

static void insert_to_mm_slots_hash(struct mm_struct *mm,
				    struct mm_slot *mm_slot)
{
	struct hlist_head *bucket;

	bucket = &mm_slots_hash[hash_ptr(mm, MM_SLOTS_HASH_SHIFT)];
	mm_slot->mm = mm;
	hlist_add_head(&mm_slot->link, bucket);
}

static inline int in_stable_tree(struct rmap_item *rmap_item)
{
	return rmap_item->address & STABLE_FLAG;
}

/*
 * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's
 * page tables after it has passed through ksm_exit() - which, if necessary,
 * takes mmap_sem briefly to serialize against them.  ksm_exit() does not set
 * a special flag: they can just back out as soon as mm_users goes to zero.
 * ksm_test_exit() is used throughout to make this test for exit: in some
 * places for correctness, in some places just to avoid unnecessary work.
 */
static inline bool ksm_test_exit(struct mm_struct *mm)
{
	return atomic_read(&mm->mm_users) == 0;
}

/*
 * We use break_ksm to break COW on a ksm page: it's a stripped down
 *
 *	if (get_user_pages(current, mm, addr, 1, 1, 1, &page, NULL) == 1)
 *		put_page(page);
 *
 * but taking great care only to touch a ksm page, in a VM_MERGEABLE vma,
 * in case the application has unmapped and remapped mm,addr meanwhile.
 * Could a ksm page appear anywhere else?  Actually yes, in a VM_PFNMAP
 * mmap of /dev/mem or /dev/kmem, where we would not want to touch it.
 */
static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
{
	struct page *page;
	int ret = 0;

	do {
		cond_resched();
		page = follow_page(vma, addr, FOLL_GET);
		if (IS_ERR_OR_NULL(page))
			break;
		if (PageKsm(page))
			ret = handle_mm_fault(vma->vm_mm, vma, addr,
							FAULT_FLAG_WRITE);
		else
			ret = VM_FAULT_WRITE;
		put_page(page);
	} while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS | VM_FAULT_OOM)));
	/*
	 * We must loop because handle_mm_fault() may back out if there's
	 * any difficulty e.g. if pte accessed bit gets updated concurrently.
	 *
	 * VM_FAULT_WRITE is what we have been hoping for: it indicates that
	 * COW has been broken, even if the vma does not permit VM_WRITE;
	 * but note that a concurrent fault might break PageKsm for us.
	 *
	 * VM_FAULT_SIGBUS could occur if we race with truncation of the
	 * backing file, which also invalidates anonymous pages: that's
	 * okay, that truncation will have unmapped the PageKsm for us.
	 *
	 * VM_FAULT_OOM: at the time of writing (late July 2009), setting
	 * aside mem_cgroup limits, VM_FAULT_OOM would only be set if the
	 * current task has TIF_MEMDIE set, and will be OOM killed on return
	 * to user; and ksmd, having no mm, would never be chosen for that.
	 *
	 * But if the mm is in a limited mem_cgroup, then the fault may fail
	 * with VM_FAULT_OOM even if the current task is not TIF_MEMDIE; and
	 * even ksmd can fail in this way - though it's usually breaking ksm
	 * just to undo a merge it made a moment before, so unlikely to oom.
	 *
	 * That's a pity: we might therefore have more kernel pages allocated
	 * than we're counting as nodes in the stable tree; but ksm_do_scan
	 * will retry to break_cow on each pass, so should recover the page
	 * in due course.  The important thing is to not let VM_MERGEABLE
	 * be cleared while any such pages might remain in the area.
	 */
	return (ret & VM_FAULT_OOM) ? -ENOMEM : 0;
}

static struct vm_area_struct *find_mergeable_vma(struct mm_struct *mm,
		unsigned long addr)
{
	struct vm_area_struct *vma;
	if (ksm_test_exit(mm))
		return NULL;
	vma = find_vma(mm, addr);
	if (!vma || vma->vm_start > addr)
		return NULL;
	if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
		return NULL;
	return vma;
}

static void break_cow(struct rmap_item *rmap_item)
{
	struct mm_struct *mm = rmap_item->mm;
	unsigned long addr = rmap_item->address;
	struct vm_area_struct *vma;

	/*
	 * It is not an accident that whenever we want to break COW
	 * to undo, we also need to drop a reference to the anon_vma.
	 */
	put_anon_vma(rmap_item->anon_vma);

	down_read(&mm->mmap_sem);
	vma = find_mergeable_vma(mm, addr);
	if (vma)
		break_ksm(vma, addr);
	up_read(&mm->mmap_sem);
}

static struct page *page_trans_compound_anon(struct page *page)
{
	if (PageTransCompound(page)) {
		struct page *head = compound_trans_head(page);
		/*
		 * head may actually be splitted and freed from under
		 * us but it's ok here.
		 */
		if (PageAnon(head))
			return head;
	}
	return NULL;
}

static struct page *get_mergeable_page(struct rmap_item *rmap_item)
{
	struct mm_struct *mm = rmap_item->mm;
	unsigned long addr = rmap_item->address;
	struct vm_area_struct *vma;
	struct page *page;

	down_read(&mm->mmap_sem);
	vma = find_mergeable_vma(mm, addr);
	if (!vma)
		goto out;

	page = follow_page(vma, addr, FOLL_GET);
	if (IS_ERR_OR_NULL(page))
		goto out;
	if (PageAnon(page) || page_trans_compound_anon(page)) {
		flush_anon_page(vma, page, addr);
		flush_dcache_page(page);
	} else {
		put_page(page);
out:		page = NULL;
	}
	up_read(&mm->mmap_sem);
	return page;
}

static void remove_node_from_stable_tree(struct stable_node *stable_node)
{
	struct rmap_item *rmap_item;
	struct hlist_node *hlist;

	hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
		if (rmap_item->hlist.next)
			ksm_pages_sharing--;
		else
			ksm_pages_shared--;
		put_anon_vma(rmap_item->anon_vma);
		rmap_item->address &= PAGE_MASK;
		cond_resched();
	}

	rb_erase(&stable_node->node, &root_stable_tree);
	free_stable_node(stable_node);
}

/*