e='generator' content='cgit v1.2.2'/>
aboutsummaryrefslogblamecommitdiffstats
path: root/mm/page-writeback.c
blob: 0b19943ecf8bb7fc75e9c9842a0018bdbac3571c (plain) (tree)
1
2
3
4
5
6
7
8
9
  
                      

                                      
                                                                         



                                                                
                             













                               
                                         

                         
                       





                           
                              
                          

  




                                                                           


                                                                             
                                                                            

                                      
                                                              
 



                                          




                                                            
                                                                        
   
                                

  





                                                                            





                                                                 

                                                                  
                        

  





                                                                    
                                                  
   
                                                                   

  
                                                             
   
                                                                 






                                                                     

                                                                                







                                       
  















                                                                               
                                         
 








                                               




                                                                               



                                          
                                                      
   







                                                                      
                                                  



                             
                                                                     





                                                                      
                                                  



                             
                                                                       




                                           
                                                           
                                                  


                                       

                
                                                                     
                                                               







                                                           
                                                  

                             
                                                 

                
                                                                       


                                                               









                                                                        

                                                                 

 









                                                   
                                            



                                                    



















                                                                            

                                                               
 
                                  
 
                                                        
                                                  
                                                     
                                                      
 


                                                  







                                                       













                                                                 
                                                                            

                                    
                                      












                                                             
  

   




                                                                           
 
                                
                                         
                              








                                                      
                                  





                                                                       




                               
                                





                                                                        
                                  


                   
                                 

  















                                                                               






                                                                  
                                                  


                                                                   

                                                        












                                                                       






                                                                   


                        
                                                                          



                                                 


                                                           
    

                                                                         
 

                                 
                                                                      

                                









                                                                
 



                                                                               
 

                                       






                                                            

                  
                              






                                                                     
                                                                  

                                               
                                                            

                                                                 


                                                             
                                                      
         





                                                                               

                                                                            
   

                                                              
 

                                                


                                        
                                        
                                








                                                                 
                                            

                  

                                                                   




                                                                           

                                                                    
 

                                                                        
 








                                                                        

                                                





                                                                       


                                                                       
                   
                                                      
                                                   
                                                                       



















                                                                                
                 





                                                                        

                                                        







                                                                       

         


                                                                 

                                       
                       









                                                                                

                                                                    
                                                                
                                                  

 
                                                                
 
                                                   






                                                                   

                                                         
   
                                                                  
                                            
                                                                       









                                                                               

                                                                       
 

                                

                                    
                                                      





                                                                              
                          
                                            

                                        
                                                     

                                 
                                                        

                       
                         
 
                                                  
 
                                         
 

                                        

                     
                                                                                






                                                                       


                                                                        
                                                     







                                                                              


         

                                                  
                                                                 

  


                                                                  
                                                          
 
                                                          


                 
                                                    
 

                                  



                                                 






                                                  








                                                                               
                                                                




























                                                                               
                                  
 
                                                                    





                                                                  
                    

                                                                        
                                  
                           

 
                                                           




                                            















                                                              


                                     

                  
                                  
                                             


                                                     
                                                 

 
   
                                                                                                     

                                                                     

                                            
  
                                                                     





                                                                                
   


                                                                           
 

                     

                            
                                                   

                                               
                           
                   
                            
                                            
 

                                





                                                                             





                                                                         
                                                           

         
                           







                                                                             
 



                                                          




                                                                              
                           










                                                                           

                                        







                                                                               
                                                                 
                



                                                  










                                                                     
 

                                                           
                                                     
 
                                                            


















                                                                             
                                              
                                              














                                                                                
                         



                                       
                               
                  
                                


                                                                               
                           
                          
                                          

                           

                                                                          
                                                              

                                               
 

                   































                                                                                                            


                                  

                                                                               

                


                                       
                                                               

                                                       
                   



                                                                      

                                   



































                                                                          









                                                              













                                                                           











                                                                              
                                                          


                                                 



                                                                   


                                 
                                                   


                                                         
                                                                                
                                                            


                                                                       
                                                     


                                                                         
                 
                         
         
                 















                                                                                






                                                                        


                                                             
                                     




                                                                           




                                                       
         



                                            

















                                                                             
                               






                                   
















                                                                           

                                  
                               
























                                                                 


                                             









                                                                      
                                               
                                                                 

                                                               
                                 
                 
                         
         
                                        
 
                                       






                                                           
                                                                         

                                    
                                                              
                                                   
                          


                                                                         
                                                             
                                                                   

                                                        
                 
                                                                   


                                                   

                                                        








                                                           
                                                                         

                                    
                                                              
                                                 
                           


                                                                         
                                                           

                                                                   



                                                                     
                                                                   


                                                 

                                                        





                                       
                                                                     



                                                          
                
                        
                                                          
                          


                              
/*
 * mm/page-writeback.c
 *
 * Copyright (C) 2002, Linus Torvalds.
 * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
 *
 * Contains functions related to writing back dirty pages at the
 * address_space level.
 *
 * 10Apr2002	Andrew Morton
 *		Initial version
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/writeback.h>
#include <linux/init.h>
#include <linux/backing-dev.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/blkdev.h>
#include <linux/mpage.h>
#include <linux/rmap.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/smp.h>
#include <linux/sysctl.h>
#include <linux/cpu.h>
#include <linux/syscalls.h>
#include <linux/buffer_head.h>
#include <linux/pagevec.h>

/*
 * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited
 * will look to see if it needs to force writeback or throttling.
 */
static long ratelimit_pages = 32;

/*
 * When balance_dirty_pages decides that the caller needs to perform some
 * non-background writeback, this is how many pages it will attempt to write.
 * It should be somewhat larger than dirtied pages to ensure that reasonably
 * large amounts of I/O are submitted.
 */
static inline long sync_writeback_pages(unsigned long dirtied)
{
	if (dirtied < ratelimit_pages)
		dirtied = ratelimit_pages;

	return dirtied + dirtied / 2;
}

/* The following parameters are exported via /proc/sys/vm */

/*
 * Start background writeback (via writeback threads) at this percentage
 */
int dirty_background_ratio = 10;

/*
 * dirty_background_bytes starts at 0 (disabled) so that it is a function of
 * dirty_background_ratio * the amount of dirtyable memory
 */
unsigned long dirty_background_bytes;

/*
 * free highmem will not be subtracted from the total free memory
 * for calculating free ratios if vm_highmem_is_dirtyable is true
 */
int vm_highmem_is_dirtyable;

/*
 * The generator of dirty data starts writeback at this percentage
 */
int vm_dirty_ratio = 20;

/*
 * vm_dirty_bytes starts at 0 (disabled) so that it is a function of
 * vm_dirty_ratio * the amount of dirtyable memory
 */
unsigned long vm_dirty_bytes;

/*
 * The interval between `kupdate'-style writebacks
 */
unsigned int dirty_writeback_interval = 5 * 100; /* centiseconds */

/*
 * The longest time for which data is allowed to remain dirty
 */
unsigned int dirty_expire_interval = 30 * 100; /* centiseconds */

/*
 * Flag that makes the machine dump writes/reads and block dirtyings.
 */
int block_dump;

/*
 * Flag that puts the machine in "laptop mode". Doubles as a timeout in jiffies:
 * a full sync is triggered after this time elapses without any disk activity.
 */
int laptop_mode;

EXPORT_SYMBOL(laptop_mode);

/* End of sysctl-exported parameters */


/*
 * Scale the writeback cache size proportional to the relative writeout speeds.
 *
 * We do this by keeping a floating proportion between BDIs, based on page
 * writeback completions [end_page_writeback()]. Those devices that write out
 * pages fastest will get the larger share, while the slower will get a smaller
 * share.
 *
 * We use page writeout completions because we are interested in getting rid of
 * dirty pages. Having them written out is the primary goal.
 *
 * We introduce a concept of time, a period over which we measure these events,
 * because demand can/will vary over time. The length of this period itself is
 * measured in page writeback completions.
 *
 */
static struct prop_descriptor vm_completions;
static struct prop_descriptor vm_dirties;

/*
 * couple the period to the dirty_ratio:
 *
 *   period/2 ~ roundup_pow_of_two(dirty limit)
 */
static int calc_period_shift(void)
{
	unsigned long dirty_total;

	if (vm_dirty_bytes)
		dirty_total = vm_dirty_bytes / PAGE_SIZE;
	else
		dirty_total = (vm_dirty_ratio * determine_dirtyable_memory()) /
				100;
	return 2 + ilog2(dirty_total - 1);
}

/*
 * update the period when the dirty threshold changes.
 */
static void update_completion_period(void)
{
	int shift = calc_period_shift();
	prop_change_shift(&vm_completions, shift);
	prop_change_shift(&vm_dirties, shift);
}

int dirty_background_ratio_handler(struct ctl_table *table, int write,
		void __user *buffer, size_t *lenp,
		loff_t *ppos)
{
	int ret;

	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
	if (ret == 0 && write)
		dirty_background_bytes = 0;
	return ret;
}

int dirty_background_bytes_handler(struct ctl_table *table, int write,
		void __user *buffer, size_t *lenp,
		loff_t *ppos)
{
	int ret;

	ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
	if (ret == 0 && write)
		dirty_background_ratio = 0;
	return ret;
}

int dirty_ratio_handler(struct ctl_table *table, int write,
		void __user *buffer, size_t *lenp,
		loff_t *ppos)
{
	int old_ratio = vm_dirty_ratio;
	int ret;

	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
	if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
		update_completion_period();
		vm_dirty_bytes = 0;
	}
	return ret;
}


int dirty_bytes_handler(struct ctl_table *table, int write,
		void __user *buffer, size_t *lenp,
		loff_t *ppos)
{
	unsigned long old_bytes = vm_dirty_bytes;
	int ret;

	ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
	if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
		update_completion_period();
		vm_dirty_ratio = 0;
	}
	return ret;
}

/*
 * Increment the BDI's writeout completion count and the global writeout
 * completion count. Called from test_clear_page_writeback().
 */
static inline void __bdi_writeout_inc(struct backing_dev_info *bdi)
{
	__prop_inc_percpu_max(&vm_completions, &bdi->completions,
			      bdi->max_prop_frac);
}

void bdi_writeout_inc(struct backing_dev_info *bdi)
{
	unsigned long flags;

	local_irq_save(flags);
	__bdi_writeout_inc(bdi);
	local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(bdi_writeout_inc);

void task_dirty_inc(struct task_struct *tsk)
{
	prop_inc_single(&vm_dirties, &tsk->dirties);
}

/*
 * Obtain an accurate fraction of the BDI's portion.
 */
static void bdi_writeout_fraction(struct backing_dev_info *bdi,
		long *numerator, long *denominator)
{
	if (bdi_cap_writeback_dirty(bdi)) {
		prop_fraction_percpu(&vm_completions, &bdi->completions,
				numerator, denominator);
	} else {
		*numerator = 0;
		*denominator = 1;
	}
}

/*
 * Clip the earned share of dirty pages to that which is actually available.
 * This avoids exceeding the total dirty_limit when the floating averages
 * fluctuate too quickly.
 */
static void clip_bdi_dirty_limit(struct backing_dev_info *bdi,
		unsigned long dirty, unsigned long *pbdi_dirty)
{
	unsigned long avail_dirty;

	avail_dirty = global_page_state(NR_FILE_DIRTY) +
		 global_page_state(NR_WRITEBACK) +
		 global_page_state(NR_UNSTABLE_NFS) +
		 global_page_state(NR_WRITEBACK_TEMP);

	if (avail_dirty < dirty)
		avail_dirty = dirty - avail_dirty;
	else
		avail_dirty = 0;

	avail_dirty += bdi_stat(bdi, BDI_RECLAIMABLE) +
		bdi_stat(bdi, BDI_WRITEBACK);

	*pbdi_dirty = min(*pbdi_dirty, avail_dirty);
}

static inline void task_dirties_fraction(struct task_struct *tsk,
		long *numerator, long *denominator)
{
	prop_fraction_single(&vm_dirties, &tsk->dirties,
				numerator, denominator);
}

/*
 * scale the dirty limit
 *
 * task specific dirty limit:
 *
 *   dirty -= (dirty/8) * p_{t}
 */
static void task_dirty_limit(struct task_struct *tsk, unsigned long *pdirty)
{
	long numerator, denominator;
	unsigned long dirty = *pdirty;
	u64 inv = dirty >> 3;

	task_dirties_fraction(tsk, &numerator, &denominator);
	inv *= numerator;
	do_div(inv, denominator);

	dirty -= inv;
	if (dirty < *pdirty/2)
		dirty = *pdirty/2;

	*pdirty = dirty;
}

/*
 *
 */
static unsigned int bdi_min_ratio;

int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
{
	int ret = 0;

	spin_lock_bh(&bdi_lock);
	if (min_ratio > bdi->max_ratio) {
		ret = -EINVAL;
	} else {
		min_ratio -= bdi->min_ratio;
		if (bdi_min_ratio + min_ratio < 100) {
			bdi_min_ratio += min_ratio;
			bdi->min_ratio += min_ratio;
		} else {
			ret = -EINVAL;
		}
	}
	spin_unlock_bh(&bdi_lock);

	return ret;
}

int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
{
	int ret = 0;

	if (max_ratio > 100)
		return -EINVAL;

	spin_lock_bh(&bdi_lock);
	if (bdi->min_ratio > max_ratio) {
		ret = -EINVAL;
	} else {
		bdi->max_ratio = max_ratio;
		bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100;
	}
	spin_unlock_bh(&bdi_lock);

	return ret;
}
EXPORT_SYMBOL(bdi_set_max_ratio);

/*
 * Work out the current dirty-memory clamping and background writeout
 * thresholds.
 *
 * The main aim here is to lower them aggressively if there is a lot of mapped
 * memory around.  To avoid stressing page reclaim with lots of unreclaimable
 * pages.  It is better to clamp down on writers than to start swapping, and
 * performing lots of scanning.
 *
 * We only allow 1/2 of the currently-unmapped memory to be dirtied.
 *
 * We don't permit the clamping level to fall below 5% - that is getting rather
 * excessive.
 *
 * We make sure that the background writeout level is below the adjusted
 * clamping level.
 */

static unsigned long highmem_dirtyable_memory(unsigned long total)
{
#ifdef CONFIG_HIGHMEM
	int node;
	unsigned long x = 0;

	for_each_node_state(node, N_HIGH_MEMORY) {
		struct zone *z =
			&NODE_DATA(node)->node_zones[ZONE_HIGHMEM];

		x += zone_page_state(z, NR_FREE_PAGES) +
		     zone_reclaimable_pages(z);
	}
	/*
	 * Make sure that the number of highmem pages is never larger
	 * than the number of the total dirtyable memory. This can only
	 * occur in very strange VM situations but we want to make sure
	 * that this does not occur.
	 */
	return min(x, total);
#else
	return 0;
#endif
}

/**
 * determine_dirtyable_memory - amount of memory that may be used
 *
 * Returns the numebr of pages that can currently be freed and used
 * by the kernel for direct mappings.
 */
unsigned long determine_dirtyable_memory(void)
{
	unsigned long x;

	x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages();

	if (!vm_highmem_is_dirtyable)
		x -= highmem_dirtyable_memory(x);

	return x + 1;	/* Ensure that we never return 0 */
}

void
get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty,
		 unsigned long *pbdi_dirty, struct backing_dev_info *bdi)
{
	unsigned long background;
	unsigned long dirty;
	unsigned long available_memory = determine_dirtyable_memory();
	struct task_struct *tsk;

	if (vm_dirty_bytes)
		dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE);
	else {
		int dirty_ratio;

		dirty_ratio = vm_dirty_ratio;
		if (dirty_ratio < 5)
			dirty_ratio = 5;
		dirty = (dirty_ratio * available_memory) / 100;
	}

	if (dirty_background_bytes)
		background = DIV_ROUND_UP(dirty_background_bytes, PAGE_SIZE);
	else
		background = (dirty_background_ratio * available_memory) / 100;

	if (background >= dirty)
		background = dirty / 2;
	tsk = current;
	if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {
		background += background / 4;
		dirty += dirty / 4;
	}
	*pbackground = background;
	*pdirty = dirty;

	if (bdi) {
		u64 bdi_dirty;
		long numerator, denominator;

		/*
		 * Calculate this BDI's share of the dirty ratio.
		 */
		bdi_writeout_fraction(bdi, &numerator, &denominator);

		bdi_dirty = (dirty * (100 - bdi_min_ratio)) / 100;
		bdi_dirty *= numerator;
		do_div(bdi_dirty, denominator);
		bdi_dirty += (dirty * bdi->min_ratio) / 100;
		if (bdi_dirty > (dirty * bdi->max_ratio) / 100)
			bdi_dirty = dirty * bdi->max_ratio / 100;

		*pbdi_dirty = bdi_dirty;
		clip_bdi_dirty_limit(bdi, dirty, pbdi_dirty);
		task_dirty_limit(current, pbdi_dirty);
	}
}

/*
 * balance_dirty_pages() must be called by processes which are generating dirty
 * data.  It looks at the number of dirty pages in the machine and will force
 * the caller to perform writeback if the system is over `vm_dirty_ratio'.
 * If we're over `background_thresh' then the writeback threads are woken to
 * perform some writeout.
 */
static void balance_dirty_pages(struct address_space *mapping,
				unsigned long write_chunk)
{
	long nr_reclaimable, bdi_nr_reclaimable;
	long nr_writeback, bdi_nr_writeback;
	unsigned long background_thresh;
	unsigned long dirty_thresh;
	unsigned long bdi_thresh;
	unsigned long pages_written = 0;
	unsigned long pause = 1;

	struct backing_dev_info *bdi = mapping->backing_dev_info;

	for (;;) {
		struct writeback_control wbc = {
			.bdi		= bdi,
			.sync_mode	= WB_SYNC_NONE,
			.older_than_this = NULL,
			.nr_to_write	= write_chunk,
			.range_cyclic	= 1,
		};

		get_dirty_limits(&background_thresh, &dirty_thresh,
				&bdi_thresh, bdi);

		nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
					global_page_state(NR_UNSTABLE_NFS);
		nr_writeback = global_page_state(NR_WRITEBACK);

		bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE);
		bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);

		if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh)
			break;

		/*
		 * Throttle it only when the background writeback cannot
		 * catch-up. This avoids (excessively) small writeouts
		 * when the bdi limits are ramping up.