aboutsummaryrefslogtreecommitdiffstats
path: root/Makefile
diff options
context:
space:
mode:
Diffstat (limited to 'Makefile')
0 files changed, 0 insertions, 0 deletions
ark'>wip-kshark The LITMUS^RT kernel.Bjoern Brandenburg
aboutsummaryrefslogblamecommitdiffstats
path: root/mm/ksm.c
blob: 7ee101eaacdfe9eb82061585bb820df243a3650b (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
  




                                                                          
                                        



                        
                    

                                                                   


                        

                     
                       










                           
                         
                               
                       
                      
                            
                          
                      
                       
 
                         
                     
 







                                             





































                                                                              


                                                                            





                                                                
                                                                       




                                                 
                                    






                                                       
                                                                   






                                                                             
                                     



                            

                                                     

                                                                               
                                                       

                                                                               

                    






                                                                       
                                
                           


                  


   
                                                                
                                                                  
                                                                          
                                                                           


                                                                            


                                                                     

                  
                                    





                                                                        

                                                                             
                                                           
               




                                                                          



                                                                     

                                                                    

                                        



                                                              
 


                                                                      

                                                           








                                                       
                                            


                                            
                                      
 
                                                               
                                       
 





                                                                  
                                                   
                                                   

                                                    
                                                    
 
                  

                                                     
                               

                                  
                                 
      
 


                         


                                            














                                                                             



                                                           

                                                   
                               


                 


                                              







                                            
                                              





                                                     





                                                                   



                                                              
                         



                                                    









                                                                    











                                                            

                                                        

                             
                                                                            

                                    
 





                                                            
                         
                                                                   

 
  












                                                                             









                                                                         
                                                                    

                          
                    


                               
                                                                         
                                         






                                                                          
                                                                                                




























                                                                             

 













                                                                      
                                                  
 

                                                

                                   



                                                                     
                                          
 
                                 


                                           


                               


                                                               
                                                        
                  

                                                                     
                   





                                    







                                                                   

                                           


                                                
                                 
                         
                                                               









                                                 







                                                                             
                                                                   

 


                                                                         
 
                                                                     



                                            
                                                  



                                                



                                                
                                                                    








                                                                 
                                                              









                                                                         

                                                                      
                                                                               


                               
                           
 

                                                                       
      
                                            







                                                                       
                                                         
                           























                                                                              
                                                           


                               
 
                      
                                
                                                                   




                                          
                    
 
      






                                                                            
                                                 
                           



                                                  
  




                                                                          

                                                
                                  
 
                                              
                                                       

                                 
 
                                             

                                  
 


                                             
                                           
 
                                                  
                                                
 
                                                        

                                  
                                                                       
                                                                   


                                                                        

                                                                           
                                
                         
                                                  
                                                                            
                                     
                                                
         
    


                                                                       
                                                               
                                                                    
 


                                                         
                                                      




                                          
                                                                          




                                                                          




                                                                         
   

                                                                    

                           
                    
 
                                                                   

                                              





                                                   

 



                                                   












                                                                        




                                                                              
                             
                
                  


                                                                              
















                                                                             
                                      


                    
                                                     









                                                                             





                                                                         


                   
                                                  



                                   


                                    
                                                               

                                                                         
 

                                                                              


                                                               

                                              

                                                                              

                                                                            

                                           
                 
 
                                                                         

                                            
                                                                    
                                                                         
                                        
                                                 










                                                                

         

                                                                           
                           



                               
                                    
                                        
                                      
                   
 
                         
 


                                           
                                       
                                                   
                            







                                                               

                                   
                                              

                             
















                                                                            

                                                               




                                              
                                        




                                                                      

                                                           
                            
 
                                                   




                                                               
                                                                               






                                                                               
                                                                 



                                                                              
                                                                            
                                                          

                                        


                                                          






                                                         

                                                                    





                                                     


                                                     



                                            

                                                                      

                                          



                           
                          

                                                               
 
                                              


                            

                                    
                         
 



                                                                      


                                                        
                            

         
                        
                                             

                                                    
                                                 
                                                                            
 
                               

                                       
                       


                                    

                                                                    



                   


























                                                                               

                                                                 

                                                              

                                                                    



                                                                       
                                                                       



                                  


                                                                     

                                            


                                                                            
                            

                         






                                                                   
                                
                         





                                                                       












                                                                         
 
                                                           
                                       

                                          




                                                                      
 
                          




                   

                                                                         

                                                                       
   

                                                                            
 
                                             


                                   

                                 
                         

                                                       

                         
                                                      


                         


                                                                         
                                                                         

                                            
    
                               



                   


                                                                     

                                                                          
  
                                                                         

                                                                    



                                                                            
 
                
 
                                                                
                   
                                                                
                                                                         
                  

                                                                      
                   
                        
                                             
         
                                 


  
                                                              



                                                                     
                                                                            

                  
                                                         
 
                
                             



                                        
 


                                                             
                               
                            

         
                                              
                                      
      
                             
                      
 
                      
                                       

                        
                               
                                                                       
                                                             

                                    
 
                                                    
                                    
 
                              
                            
                                               
                                 
                                                








                                                                         
                                        
                                                       













                                                                               
                 

         





                                                    
                                                






                                              
                                                                            

                               
                                                   




                                                        


  
                                                                        

                        

                                                                        
   
                                                                 
 

                           
                             
                             
                                      
                                        
 

                                  

                                      
 
                      
                                       

                        
                               
                                                                       
                                                             

                                    
 

                                                     















                                                                              


                                          
 
                                             
                                 
                                                 
                                        
                                                      
                                                  
 
                           


  

                                                           










                                                                          



                                                                          
 

                             
                                      


                                              
                                        
                             


                                                 
                                       

                        
                               
                                                                        
                                                               
                                              


                                    
                                                                 
                   

                                            


                                    
                                                    


                              
                                            

                                               
                                            
                                                








                                                                                
                        
                                                



                                              
                                            
                                                            
                                      
                                                    
                                                
 
                             








                                                                      
                                                               
 
                                      
                                          
                                                               
 



                                    


  



                                                                              





                                                                              
                                         
                                      
                                        
                           


                              




                                                                                
                                                                            






                                                                        

                                                                           
                                         






                                                              
                    
                                                                         




                                                                
                                         
                                                                               
                                           
                 
                                



                       



                                                                           






                                                  

                                                                         
                             


                                                                           
                            



                                                                           
                                         




                                                                                
                                           
 





                                                                              
                                           

                                                          

                         



                                                                    
                                                                         



                                                               

                                       
                                                             
                                         

                                              
                                                  
                                                      







                                                

                                                  









                                                                    
                





                                             











                                                                             




















                                                                            
                                                           
                                                          




                                                                               





                                                                             

                                     
                                                      



                                 





                                                     







                                                         

                                              
                                                                             




                                                              

                                                              


                                                                              
                                                                              
                                                

                                                                              





                                                                      
                                        




                                                      

                                     
                                                      
         



                                                                       
                                                             

                                    






                                                                            



                                                                            
                   
                                      
                                         

                                              

                                                        




                                              
         

                                                                  
                                


                                 










                                                                   
                                             
 
                                                             



                                                           
                                                    



                               




                                                                              

                                         
                        
                                  

                                        
                                              
                                       
                                      
                                                              

                                                

                                
                                        


                                                                              
                                                             
                                                                            




                 



                                                                       
                







                                                                            
                                                           

                                                                            




                                       




                                                              







                                                                            




                                                                 









                                           



                                  


                               


                                                            


                                             

                                                                         

                                                                            



                                                                           
           



                                                                             

                                      
                                              
                                  



                                                        


                 
                                     
 
                                
                             
 
          





                                                                              
           
 

                                    
                                                     
                                          
                                                 





                                                              
         

                                      




                                                        

                                          
         

 
                                                      

                                                                          
                                                        

                              












                                                                               





                                                                      
                                            

         


                        
                                                                   

                                        



                                    
                                             




                                                                            
                                                




                                             
                                                                     
                                                                
                                            

                                           
                                             

                                                                        
                                        











                                                                              


                                                                                

                                                                      
                                                
                                                               

                                         



                                                               
                 
                                               






                                
                       



                                                                 


                                                                      


                                                
                                                                                   
                                                         







                                                                             



                             
                              




                                                             
                                                  



                                              

                                                          
 
                                        
                                      
                             
                
 

                                                        
                              

                                                                               





                                                                             
                                                                        


                                                     
                 
         






                                                                         





                                                               



                               




                                                                               
                   


                                                





                                                                              


                                                                             
                   

                                                                    


                                 

                                              
                                                


                                                              



                         



                                      

                                    




                                                                   


















                                                                           
                                        





















                                                                         
                                           











                                                                          
                                              







                                                                           
                                        







                                                                        

                                                                     


                                      
                               

                                
                                              
                                                 
                                                                  
                                                   




                                                       









                                                        




















                                                                       
                               
                                             
                                                                  
                                     








                                                                              

                                                                              










                                                                               
                                                      

                                                                 







                                        









                                                                         
                                                        


                           






























                                                                          



                                        

                                 


                                  


                                      






                                                
                         









                                       

                                                                
                                                         
                                          
                              

         
                   

                                                           
                                                       
                                         
                              
         


                                                                         
                         
 
                              
                                                           

                                                          

                 
         


                        
 
                          
/*
 * Memory merging support.
 *
 * This code enables dynamic sharing of identical pages found in different
 * memory areas, even if they are not shared by fork()
 *
 * Copyright (C) 2008-2009 Red Hat, Inc.
 * Authors:
 *	Izik Eidus
 *	Andrea Arcangeli
 *	Chris Wright
 *	Hugh Dickins
 *
 * This work is licensed under the terms of the GNU GPL, version 2.
 */

#include <linux/errno.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/mman.h>
#include <linux/sched.h>
#include <linux/rwsem.h>
#include <linux/pagemap.h>
#include <linux/rmap.h>
#include <linux/spinlock.h>
#include <linux/jhash.h>
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/wait.h>
#include <linux/slab.h>
#include <linux/rbtree.h>
#include <linux/memory.h>
#include <linux/mmu_notifier.h>
#include <linux/swap.h>
#include <linux/ksm.h>
#include <linux/hashtable.h>
#include <linux/freezer.h>
#include <linux/oom.h>
#include <linux/numa.h>

#include <asm/tlbflush.h>
#include "internal.h"

#ifdef CONFIG_NUMA
#define NUMA(x)		(x)
#define DO_NUMA(x)	do { (x); } while (0)
#else
#define NUMA(x)		(0)
#define DO_NUMA(x)	do { } while (0)
#endif

/*
 * A few notes about the KSM scanning process,
 * to make it easier to understand the data structures below:
 *
 * In order to reduce excessive scanning, KSM sorts the memory pages by their
 * contents into a data structure that holds pointers to the pages' locations.
 *
 * Since the contents of the pages may change at any moment, KSM cannot just
 * insert the pages into a normal sorted tree and expect it to find anything.
 * Therefore KSM uses two data structures - the stable and the unstable tree.
 *
 * The stable tree holds pointers to all the merged pages (ksm pages), sorted
 * by their contents.  Because each such page is write-protected, searching on
 * this tree is fully assured to be working (except when pages are unmapped),
 * and therefore this tree is called the stable tree.
 *
 * In addition to the stable tree, KSM uses a second data structure called the
 * unstable tree: this tree holds pointers to pages which have been found to
 * be "unchanged for a period of time".  The unstable tree sorts these pages
 * by their contents, but since they are not write-protected, KSM cannot rely
 * upon the unstable tree to work correctly - the unstable tree is liable to
 * be corrupted as its contents are modified, and so it is called unstable.
 *
 * KSM solves this problem by several techniques:
 *
 * 1) The unstable tree is flushed every time KSM completes scanning all
 *    memory areas, and then the tree is rebuilt again from the beginning.
 * 2) KSM will only insert into the unstable tree, pages whose hash value
 *    has not changed since the previous scan of all memory areas.
 * 3) The unstable tree is a RedBlack Tree - so its balancing is based on the
 *    colors of the nodes and not on their contents, assuring that even when
 *    the tree gets "corrupted" it won't get out of balance, so scanning time
 *    remains the same (also, searching and inserting nodes in an rbtree uses
 *    the same algorithm, so we have no overhead when we flush and rebuild).
 * 4) KSM never flushes the stable tree, which means that even if it were to
 *    take 10 attempts to find a page in the unstable tree, once it is found,
 *    it is secured in the stable tree.  (When we scan a new page, we first
 *    compare it against the stable tree, and then against the unstable tree.)
 *
 * If the merge_across_nodes tunable is unset, then KSM maintains multiple
 * stable trees and multiple unstable trees: one of each for each NUMA node.
 */

/**
 * struct mm_slot - ksm information per mm that is being scanned
 * @link: link to the mm_slots hash list
 * @mm_list: link into the mm_slots list, rooted in ksm_mm_head
 * @rmap_list: head for this mm_slot's singly-linked list of rmap_items
 * @mm: the mm that this information is valid for
 */
struct mm_slot {
	struct hlist_node link;
	struct list_head mm_list;
	struct rmap_item *rmap_list;
	struct mm_struct *mm;
};

/**
 * struct ksm_scan - cursor for scanning
 * @mm_slot: the current mm_slot we are scanning
 * @address: the next address inside that to be scanned
 * @rmap_list: link to the next rmap to be scanned in the rmap_list
 * @seqnr: count of completed full scans (needed when removing unstable node)
 *
 * There is only the one ksm_scan instance of this cursor structure.
 */
struct ksm_scan {
	struct mm_slot *mm_slot;
	unsigned long address;
	struct rmap_item **rmap_list;
	unsigned long seqnr;
};

/**
 * struct stable_node - node of the stable rbtree
 * @node: rb node of this ksm page in the stable tree
 * @head: (overlaying parent) &migrate_nodes indicates temporarily on that list
 * @list: linked into migrate_nodes, pending placement in the proper node tree
 * @hlist: hlist head of rmap_items using this ksm page
 * @kpfn: page frame number of this ksm page (perhaps temporarily on wrong nid)
 * @nid: NUMA node id of stable tree in which linked (may not match kpfn)
 */
struct stable_node {
	union {
		struct rb_node node;	/* when node of stable tree */
		struct {		/* when listed for migration */
			struct list_head *head;
			struct list_head list;
		};
	};
	struct hlist_head hlist;
	unsigned long kpfn;
#ifdef CONFIG_NUMA
	int nid;
#endif
};

/**
 * struct rmap_item - reverse mapping item for virtual addresses
 * @rmap_list: next rmap_item in mm_slot's singly-linked rmap_list
 * @anon_vma: pointer to anon_vma for this mm,address, when in stable tree
 * @nid: NUMA node id of unstable tree in which linked (may not match page)
 * @mm: the memory structure this rmap_item is pointing into
 * @address: the virtual address this rmap_item tracks (+ flags in low bits)
 * @oldchecksum: previous checksum of the page at that virtual address
 * @node: rb node of this rmap_item in the unstable tree
 * @head: pointer to stable_node heading this list in the stable tree
 * @hlist: link into hlist of rmap_items hanging off that stable_node
 */
struct rmap_item {
	struct rmap_item *rmap_list;
	union {
		struct anon_vma *anon_vma;	/* when stable */
#ifdef CONFIG_NUMA
		int nid;		/* when node of unstable tree */
#endif
	};
	struct mm_struct *mm;
	unsigned long address;		/* + low bits used for flags below */
	unsigned int oldchecksum;	/* when unstable */
	union {
		struct rb_node node;	/* when node of unstable tree */
		struct {		/* when listed from stable tree */
			struct stable_node *head;
			struct hlist_node hlist;
		};
	};
};

#define SEQNR_MASK	0x0ff	/* low bits of unstable tree seqnr */
#define UNSTABLE_FLAG	0x100	/* is a node of the unstable tree */
#define STABLE_FLAG	0x200	/* is listed from the stable tree */

/* The stable and unstable tree heads */
static struct rb_root one_stable_tree[1] = { RB_ROOT };
static struct rb_root one_unstable_tree[1] = { RB_ROOT };
static struct rb_root *root_stable_tree = one_stable_tree;
static struct rb_root *root_unstable_tree = one_unstable_tree;

/* Recently migrated nodes of stable tree, pending proper placement */
static LIST_HEAD(migrate_nodes);

#define MM_SLOTS_HASH_BITS 10
static DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);

static struct mm_slot ksm_mm_head = {
	.mm_list = LIST_HEAD_INIT(ksm_mm_head.mm_list),
};
static struct ksm_scan ksm_scan = {
	.mm_slot = &ksm_mm_head,
};

static struct kmem_cache *rmap_item_cache;
static struct kmem_cache *stable_node_cache;
static struct kmem_cache *mm_slot_cache;

/* The number of nodes in the stable tree */
static unsigned long ksm_pages_shared;

/* The number of page slots additionally sharing those nodes */
static unsigned long ksm_pages_sharing;

/* The number of nodes in the unstable tree */
static unsigned long ksm_pages_unshared;

/* The number of rmap_items in use: to calculate pages_volatile */
static unsigned long ksm_rmap_items;

/* Number of pages ksmd should scan in one batch */
static unsigned int ksm_thread_pages_to_scan = 100;

/* Milliseconds ksmd should sleep between batches */
static unsigned int ksm_thread_sleep_millisecs = 20;

#ifdef CONFIG_NUMA
/* Zeroed when merging across nodes is not allowed */
static unsigned int ksm_merge_across_nodes = 1;
static int ksm_nr_node_ids = 1;
#else
#define ksm_merge_across_nodes	1U
#define ksm_nr_node_ids		1
#endif

#define KSM_RUN_STOP	0
#define KSM_RUN_MERGE	1
#define KSM_RUN_UNMERGE	2
#define KSM_RUN_OFFLINE	4
static unsigned long ksm_run = KSM_RUN_STOP;
static void wait_while_offlining(void);

static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
static DEFINE_MUTEX(ksm_thread_mutex);
static DEFINE_SPINLOCK(ksm_mmlist_lock);

#define KSM_KMEM_CACHE(__struct, __flags) kmem_cache_create("ksm_"#__struct,\
		sizeof(struct __struct), __alignof__(struct __struct),\
		(__flags), NULL)

static int __init ksm_slab_init(void)
{
	rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0);
	if (!rmap_item_cache)
		goto out;

	stable_node_cache = KSM_KMEM_CACHE(stable_node, 0);
	if (!stable_node_cache)
		goto out_free1;

	mm_slot_cache = KSM_KMEM_CACHE(mm_slot, 0);
	if (!mm_slot_cache)
		goto out_free2;

	return 0;

out_free2:
	kmem_cache_destroy(stable_node_cache);
out_free1:
	kmem_cache_destroy(rmap_item_cache);
out:
	return -ENOMEM;
}

static void __init ksm_slab_free(void)
{
	kmem_cache_destroy(mm_slot_cache);
	kmem_cache_destroy(stable_node_cache);
	kmem_cache_destroy(rmap_item_cache);
	mm_slot_cache = NULL;
}

static inline struct rmap_item *alloc_rmap_item(void)
{
	struct rmap_item *rmap_item;

	rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL);
	if (rmap_item)
		ksm_rmap_items++;
	return rmap_item;
}

static inline void free_rmap_item(struct rmap_item *rmap_item)
{
	ksm_rmap_items--;
	rmap_item->mm = NULL;	/* debug safety */
	kmem_cache_free(rmap_item_cache, rmap_item);
}

static inline struct stable_node *alloc_stable_node(void)
{
	return kmem_cache_alloc(stable_node_cache, GFP_KERNEL);
}

static inline void free_stable_node(struct stable_node *stable_node)
{
	kmem_cache_free(stable_node_cache, stable_node);
}

static inline struct mm_slot *alloc_mm_slot(void)
{
	if (!mm_slot_cache)	/* initialization failed */
		return NULL;
	return kmem_cache_zalloc(mm_slot_cache, GFP_KERNEL);
}

static inline void free_mm_slot(struct mm_slot *mm_slot)
{
	kmem_cache_free(mm_slot_cache, mm_slot);
}

static struct mm_slot *get_mm_slot(struct mm_struct *mm)
{
	struct mm_slot *slot;

	hash_for_each_possible(mm_slots_hash, slot, link, (unsigned long)mm)
		if (slot->mm == mm)
			return slot;

	return NULL;
}

static void insert_to_mm_slots_hash(struct mm_struct *mm,
				    struct mm_slot *mm_slot)
{
	mm_slot->mm = mm;
	hash_add(mm_slots_hash, &mm_slot->link, (unsigned long)mm);
}

/*
 * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's
 * page tables after it has passed through ksm_exit() - which, if necessary,
 * takes mmap_sem briefly to serialize against them.  ksm_exit() does not set
 * a special flag: they can just back out as soon as mm_users goes to zero.
 * ksm_test_exit() is used throughout to make this test for exit: in some
 * places for correctness, in some places just to avoid unnecessary work.
 */
static inline bool ksm_test_exit(struct mm_struct *mm)
{
	return atomic_read(&mm->mm_users) == 0;
}

/*
 * We use break_ksm to break COW on a ksm page: it's a stripped down
 *
 *	if (get_user_pages(current, mm, addr, 1, 1, 1, &page, NULL) == 1)
 *		put_page(page);
 *
 * but taking great care only to touch a ksm page, in a VM_MERGEABLE vma,
 * in case the application has unmapped and remapped mm,addr meanwhile.
 * Could a ksm page appear anywhere else?  Actually yes, in a VM_PFNMAP
 * mmap of /dev/mem or /dev/kmem, where we would not want to touch it.
 */
static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
{
	struct page *page;
	int ret = 0;

	do {
		cond_resched();
		page = follow_page(vma, addr, FOLL_GET | FOLL_MIGRATION);
		if (IS_ERR_OR_NULL(page))
			break;
		if (PageKsm(page))
			ret = handle_mm_fault(vma->vm_mm, vma, addr,
							FAULT_FLAG_WRITE);
		else
			ret = VM_FAULT_WRITE;
		put_page(page);
	} while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | VM_FAULT_OOM)));
	/*
	 * We must loop because handle_mm_fault() may back out if there's
	 * any difficulty e.g. if pte accessed bit gets updated concurrently.
	 *
	 * VM_FAULT_WRITE is what we have been hoping for: it indicates that
	 * COW has been broken, even if the vma does not permit VM_WRITE;
	 * but note that a concurrent fault might break PageKsm for us.
	 *
	 * VM_FAULT_SIGBUS could occur if we race with truncation of the
	 * backing file, which also invalidates anonymous pages: that's
	 * okay, that truncation will have unmapped the PageKsm for us.
	 *
	 * VM_FAULT_OOM: at the time of writing (late July 2009), setting
	 * aside mem_cgroup limits, VM_FAULT_OOM would only be set if the
	 * current task has TIF_MEMDIE set, and will be OOM killed on return
	 * to user; and ksmd, having no mm, would never be chosen for that.
	 *
	 * But if the mm is in a limited mem_cgroup, then the fault may fail
	 * with VM_FAULT_OOM even if the current task is not TIF_MEMDIE; and
	 * even ksmd can fail in this way - though it's usually breaking ksm
	 * just to undo a merge it made a moment before, so unlikely to oom.
	 *
	 * That's a pity: we might therefore have more kernel pages allocated
	 * than we're counting as nodes in the stable tree; but ksm_do_scan
	 * will retry to break_cow on each pass, so should recover the page
	 * in due course.  The important thing is to not let VM_MERGEABLE
	 * be cleared while any such pages might remain in the area.
	 */
	return (ret & VM_FAULT_OOM) ? -ENOMEM : 0;
}

static struct vm_area_struct *find_mergeable_vma(struct mm_struct *mm,
		unsigned long addr)
{
	struct vm_area_struct *vma;
	if (ksm_test_exit(mm))
		return NULL;
	vma = find_vma(mm, addr);
	if (!vma || vma->vm_start > addr)
		return NULL;
	if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
		return NULL;
	return vma;
}

static void break_cow(struct rmap_item *rmap_item)
{
	struct mm_struct *mm = rmap_item->mm;