litmus-rt.git - The LITMUS^RT kernel.

diff options

author	Dmitry Kasatkin <dmitry.kasatkin@intel.com>	2012-09-12 13:51:32 -0400
committer	Mimi Zohar <zohar@linux.vnet.ibm.com>	2012-09-13 14:23:57 -0400
commit	45e2472e67bf66f794d507b52e82af92e0614e49 (patch)
tree	4b3ba557d4f9da9bca14ce85bee965e4a9fcd6ac /lib/mpi/mpi-inline.h
parent	d9d300cdb6f233c4c591348919c758062198a4f4 (diff)

ima: generic IMA action flag handling

Make the IMA action flag handling generic in order to support additional new actions, without requiring changes to the base implementation. New actions, like audit logging, will only need to modify the define statements. Signed-off-by: Dmitry Kasatkin <dmitry.kasatkin@intel.com> Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>

Diffstat (limited to 'lib/mpi/mpi-inline.h')

0 files changed, 0 insertions, 0 deletions

/* * mm/page-writeback.c * * Copyright (C) 2002, Linus Torvalds. * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> * * Contains functions related to writing back dirty pages at the * address_space level. * * 10Apr2002 akpm@zip.com.au * Initial version */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/spinlock.h> #include <linux/fs.h> #include <linux/mm.h> #include <linux/swap.h> #include <linux/slab.h> #include <linux/pagemap.h> #include <linux/writeback.h> #include <linux/init.h> #include <linux/backing-dev.h> #include <linux/task_io_accounting_ops.h> #include <linux/blkdev.h> #include <linux/mpage.h> #include <linux/rmap.h> #include <linux/percpu.h> #include <linux/notifier.h> #include <linux/smp.h> #include <linux/sysctl.h> #include <linux/cpu.h> #include <linux/syscalls.h> #include <linux/buffer_head.h> #include <linux/pagevec.h> /* * The maximum number of pages to writeout in a single bdflush/kupdate * operation. We do this so we don't hold I_SYNC against an inode for * enormous amounts of time, which would block a userspace task which has * been forced to throttle against that inode. Also, the code reevaluates * the dirty each time it has written this many pages. */ #define MAX_WRITEBACK_PAGES 1024 /* * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited * will look to see if it needs to force writeback or throttling. */ static long ratelimit_pages = 32; /* * When balance_dirty_pages decides that the caller needs to perform some * non-background writeback, this is how many pages it will attempt to write. * It should be somewhat larger than RATELIMIT_PAGES to ensure that reasonably * large amounts of I/O are submitted. */ static inline long sync_writeback_pages(void) { return ratelimit_pages + ratelimit_pages / 2; } /* The following parameters are exported via /proc/sys/vm */ /* * Start background writeback (via pdflush) at this percentage */ int dirty_background_ratio = 5; /* * free highmem will not be subtracted from the total free memory * for calculating free ratios if vm_highmem_is_dirtyable is true */ int vm_highmem_is_dirtyable; /* * The generator of dirty data starts writeback at this percentage */ int vm_dirty_ratio = 10; /* * The interval between `kupdate'-style writebacks, in jiffies */ int dirty_writeback_interval = 5 * HZ; /* * The longest number of jiffies for which data is allowed to remain dirty */ int dirty_expire_interval = 30 * HZ; /* * Flag that makes the machine dump writes/reads and block dirtyings. */ int block_dump; /* * Flag that puts the machine in "laptop mode". Doubles as a timeout in jiffies: * a full sync is triggered after this time elapses without any disk activity. */ int laptop_mode; EXPORT_SYMBOL(laptop_mode); /* End of sysctl-exported parameters */ static void background_writeout(unsigned long _min_pages); /* * Scale the writeback cache size proportional to the relative writeout speeds. * * We do this by keeping a floating proportion between BDIs, based on page * writeback completions [end_page_writeback()]. Those devices that write out * pages fastest will get the larger share, while the slower will get a smaller * share. * * We use page writeout completions because we are interested in getting rid of * dirty pages. Having them written out is the primary goal. * * We introduce a concept of time, a period over which we measure these events, * because demand can/will vary over time. The length of this period itself is * measured in page writeback completions. * */ static struct prop_descriptor vm_completions; static struct prop_descriptor vm_dirties; static unsigned long determine_dirtyable_memory(void); /* * couple the period to the dirty_ratio: * * period/2 ~ roundup_pow_of_two(dirty limit) */ static int calc_period_shift(void) { unsigned long dirty_total; dirty_total = (vm_dirty_ratio * determine_dirtyable_memory()) / 100; return 2 + ilog2(dirty_total - 1); } /* * update the period when the dirty ratio changes. */ int dirty_ratio_handler(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { int old_ratio = vm_dirty_ratio; int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); if (ret == 0 && write && vm_dirty_ratio != old_ratio) { int shift = calc_period_shift(); prop_change_shift(&vm_completions, shift); prop_change_shift(&vm_dirties, shift); } return ret; } /* * Increment the BDI's writeout completion count and the global writeout * completion count. Called from test_clear_page_writeback(). */ static inline void __bdi_writeout_inc(struct backing_dev_info *bdi) { __prop_inc_percpu_max(&vm_completions, &bdi->completions, bdi->max_prop_frac); } void bdi_writeout_inc(struct backing_dev_info *bdi) { unsigned long flags; local_irq_save(flags); __bdi_writeout_inc(bdi); local_irq_restore(flags); } EXPORT_SYMBOL_GPL(bdi_writeout_inc); static inline void task_dirty_inc(struct task_struct *tsk) { prop_inc_single(&vm_dirties, &tsk->dirties); } /* * Obtain an accurate fraction of the BDI's portion. */ static void bdi_writeout_fraction(struct backing_dev_info *bdi, long *numerator, long *denominator) { if (bdi_cap_writeback_dirty(bdi)) { prop_fraction_percpu(&vm_completions, &bdi->completions, numerator, denominator); } else { *numerator = 0; *denominator = 1; } } /* * Clip the earned share of dirty pages to that which is actually available. * This avoids exceeding the total dirty_limit when the floating averages * fluctuate too quickly. */ static void clip_bdi_dirty_limit(struct backing_dev_info *bdi, long dirty, long *pbdi_dirty) { long avail_dirty; avail_dirty = dirty - (global_page_state(NR_FILE_DIRTY) + global_page_state(NR_WRITEBACK) + global_page_state(NR_UNSTABLE_NFS) + global_page_state(NR_WRITEBACK_TEMP)); if (avail_dirty < 0) avail_dirty = 0; avail_dirty += bdi_stat(bdi, BDI_RECLAIMABLE) + bdi_stat(bdi, BDI_WRITEBACK); *pbdi_dirty = min(*pbdi_dirty, avail_dirty); } static inline void task_dirties_fraction(struct task_struct *tsk, long *numerator, long *denominator) { prop_fraction_single(&vm_dirties, &tsk->dirties, numerator, denominator); } /* * scale the dirty limit * * task specific dirty limit: * * dirty -= (dirty/8) * p_{t} */ static void task_dirty_limit(struct task_struct *tsk, long *pdirty) { long numerator, denominator; long dirty = *pdirty; u64 inv = dirty >> 3; task_dirties_fraction(tsk, &numerator, &denominator); inv *= numerator; do_div(inv, denominator); dirty -= inv; if (dirty < *pdirty/2) dirty = *pdirty/2; *pdirty = dirty; } /* * */ static DEFINE_SPINLOCK(bdi_lock); static unsigned int bdi_min_ratio; int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) { int ret = 0; unsigned long flags; spin_lock_irqsave(&bdi_lock, flags); if (min_ratio > bdi->max_ratio) { ret = -EINVAL; } else { min_ratio -= bdi->min_ratio; if (bdi_min_ratio + min_ratio < 100) { bdi_min_ratio += min_ratio; bdi->min_ratio += min_ratio; } else { ret = -EINVAL; } } spin_unlock_irqrestore(&bdi_lock, flags); return ret; } int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) { unsigned long flags; int ret = 0; if (max_ratio > 100) return -EINVAL; spin_lock_irqsave(&bdi_lock, flags); if (bdi->min_ratio > max_ratio) { ret = -EINVAL; } else { bdi->max_ratio = max_ratio; bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100; } spin_unlock_irqrestore(&bdi_lock, flags); return ret; } EXPORT_SYMBOL(bdi_set_max_ratio); /* * Work out the current dirty-memory clamping and background writeout * thresholds. * * The main aim here is to lower them aggressively if there is a lot of mapped * memory around. To avoid stressing page reclaim with lots of unreclaimable * pages. It is better to clamp down on writers than to start swapping, and * performing lots of scanning. * * We only allow 1/2 of the currently-unmapped memory to be dirtied. * * We don't permit the clamping level to fall below 5% - that is getting rather * excessive. * * We make sure that the background writeout level is below the adjusted * clamping level. */ static unsigned long highmem_dirtyable_memory(unsigned long total) { #ifdef CONFIG_HIGHMEM int node; unsigned long x = 0; for_each_node_state(node, N_HIGH_MEMORY) { struct zone *z = &NODE_DATA(node)->node_zones[ZONE_HIGHMEM]; x += zone_page_state(z, NR_FREE_PAGES) + zone_page_state(z, NR_INACTIVE) + zone_page_state(z, NR_ACTIVE); } /* * Make sure that the number of highmem pages is never larger * than the number of the total dirtyable memory. This can only * occur in very strange VM situations but we want to make sure * that this does not occur. */ return min(x, total); #else return 0; #endif } static unsigned long determine_dirtyable_memory(void) { unsigned long x; x = global_page_state(NR_FREE_PAGES) + global_page_state(NR_INACTIVE) + global_page_state(NR_ACTIVE); if (!vm_highmem_is_dirtyable) x -= highmem_dirtyable_memory(x); return x + 1; /* Ensure that we never return 0 */ } void get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty, struct backing_dev_info *bdi) { int background_ratio; /* Percentages */ int dirty_ratio; long background; long dirty; unsigned long available_memory = determine_dirtyable_memory(); struct task_struct *tsk; dirty_ratio = vm_dirty_ratio; if (dirty_ratio < 5) dirty_ratio = 5; background_ratio = dirty_background_ratio; if (background_ratio >= dirty_ratio) background_ratio = dirty_ratio / 2; background = (background_ratio * available_memory) / 100; dirty = (dirty_ratio * available_memory) / 100; tsk = current; if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) { background += background / 4; dirty += dirty / 4; } *pbackground = background; *pdirty = dirty; if (bdi) { u64 bdi_dirty; long numerator, denominator; /* * Calculate this BDI's share of the dirty ratio. */ bdi_writeout_fraction(bdi, &numerator, &denominator); bdi_dirty = (dirty * (100 - bdi_min_ratio)) / 100; bdi_dirty *= numerator; do_div(bdi_dirty, denominator); bdi_dirty += (dirty * bdi->min_ratio) / 100; if (bdi_dirty > (dirty * bdi->max_ratio) / 100) bdi_dirty = dirty * bdi->max_ratio / 100; *pbdi_dirty = bdi_dirty; clip_bdi_dirty_limit(bdi, dirty, pbdi_dirty); task_dirty_limit(current, pbdi_dirty); } } /* * balance_dirty_pages() must be called by processes which are generating dirty * data. It looks at the number of dirty pages in the machine and will force * the caller to perform writeback if the system is over `vm_dirty_ratio'. * If we're over `background_thresh' then pdflush is woken to perform some * writeout. */ static void balance_dirty_pages(struct address_space *mapping) { long nr_reclaimable, bdi_nr_reclaimable; long nr_writeback, bdi_nr_writeback; long background_thresh; long dirty_thresh; long bdi_thresh; unsigned long pages_written = 0; unsigned long write_chunk = sync_writeback_pages(); struct backing_dev_info *bdi = mapping->backing_dev_info; for (;;) { struct writeback_control wbc = { .bdi = bdi, .sync_mode = WB_SYNC_NONE, .older_than_this = NULL, .nr_to_write = write_chunk, .range_cyclic = 1, }; get_dirty_limits(&background_thresh, &dirty_thresh, &bdi_thresh, bdi); nr_reclaimable = global_page_state(NR_FILE_DIRTY) + global_page_state(NR_UNSTABLE_NFS); nr_writeback = global_page_state(NR_WRITEBACK); bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE); bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK); if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh) break; /* * Throttle it only when the background writeback cannot * catch-up. This avoids (excessively) small writeouts * when the bdi limits are ramping up. */ if (nr_reclaimable + nr_writeback < (background_thresh + dirty_thresh) / 2) break; if (!bdi->dirty_exceeded) bdi->dirty_exceeded = 1; /* Note: nr_reclaimable denotes nr_dirty + nr_unstable. * Unstable writes are a feature of certain networked * filesystems (i.e. NFS) in which data may have been * written to the server's write cache, but has not yet * been flushed to permanent storage. */ if (bdi_nr_reclaimable) { writeback_inodes(&wbc); pages_written += write_chunk - wbc.nr_to_write; get_dirty_limits(&background_thresh, &dirty_thresh, &bdi_thresh, bdi); } /* * In order to avoid the stacked BDI deadlock we need * to ensure we accurately count the 'dirty' pages when * the threshold is low. * * Otherwise it would be possible to get thresh+n pages * reported dirty, even though there are thresh-m pages * actually dirty; with m+n sitting in the percpu * deltas. */ if (bdi_thresh < 2*bdi_stat_error(bdi)) { bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_RECLAIMABLE); bdi_nr_writeback = bdi_stat_sum(bdi, BDI_WRITEBACK); } else if (bdi_nr_reclaimable) { bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE); bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK); } if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh) break; if (pages_written >= write_chunk) break; /* We've done our duty */ congestion_wait(WRITE, HZ/10); } if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh && bdi->dirty_exceeded) bdi->dirty_exceeded = 0; if (writeback_in_progress(bdi)) return; /* pdflush is already working this queue */ /* * In laptop mode, we wait until hitting the higher threshold before * starting background writeout, and then write out all the way down * to the lower threshold. So slow writers cause minimal disk activity. * * In normal mode, we start background writeout at the lower * background_thresh, to keep the amount of dirty memory low. */ if ((laptop_mode && pages_written) || (!laptop_mode && (global_page_state(NR_FILE_DIRTY) + global_page_state(NR_UNSTABLE_NFS) > background_thresh))) pdflush_operation(background_writeout, 0); } void set_page_dirty_balance(struct page *page, int page_mkwrite) { if (set_page_dirty(page) || page_mkwrite) { struct address_space *mapping = page_mapping(page); if (mapping) balance_dirty_pages_ratelimited(mapping); } } /** * balance_dirty_pages_ratelimited_nr - balance dirty memory state * @mapping: address_space which was dirtied * @nr_pages_dirtied: number of pages which the caller has just dirtied * * Processes which are dirtying memory should call in here once for each page * which was newly dirtied. The function will periodically check the system's * dirty state and will initiate writeback if needed. * * On really big machines, get_writeback_state is expensive, so try to avoid * calling it too often (ratelimiting). But once we're over the dirty memory * limit we decrease the ratelimiting by a lot, to prevent individual processes * from overshooting the limit by (ratelimit_pages) each. */ void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, unsigned long nr_pages_dirtied) { static DEFINE_PER_CPU(unsigned long, ratelimits) = 0; unsigned long ratelimit; unsigned long *p; ratelimit = ratelimit_pages; if (mapping->backing_dev_info->dirty_exceeded) ratelimit = 8; /* * Check the rate limiting. Also, we do not want to throttle real-time * tasks in balance_dirty_pages(). Period. */ preempt_disable(); p = &__get_cpu_var(ratelimits); *p += nr_pages_dirtied; if (unlikely(*p >= ratelimit)) { *p = 0; preempt_enable(); balance_dirty_pages(mapping); return; } preempt_enable(); } EXPORT_SYMBOL(balance_dirty_pages_ratelimited_nr); void throttle_vm_writeout(gfp_t gfp_mask) { long background_thresh; long dirty_thresh; for ( ; ; ) { get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL); /* * Boost the allowable dirty threshold a bit for page * allocators so they don't get DoS'ed by heavy writers */ dirty_thresh += dirty_thresh / 10; /* wheeee... */ if (global_page_state(NR_UNSTABLE_NFS) + global_page_state(NR_WRITEBACK) <= dirty_thresh) break; congestion_wait(WRITE, HZ/10); /* * The caller might hold locks which can prevent IO completion * or progress in the filesystem. So we cannot just sit here * waiting for IO to complete. */ if ((gfp_mask & (__GFP_FS|__GFP_IO)) != (__GFP_FS|__GFP_IO)) break; } } /* * writeback at least _min_pages, and keep writing until the amount of dirty * memory is less than the background threshold, or until we're all clean. */ static void background_writeout(unsigned long _min_pages) { long min_pages = _min_pages; struct writeback_control wbc = { .bdi = NULL, .sync_mode = WB_SYNC_NONE, .older_than_this = NULL, .nr_to_write = 0, .nonblocking = 1, .range_cyclic = 1, }; for ( ; ; ) { long background_thresh; long dirty_thresh; get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL); if (global_page_state(NR_FILE_DIRTY) + global_page_state(NR_UNSTABLE_NFS) < background_thresh && min_pages <= 0) break; wbc.more_io = 0; wbc.encountered_congestion = 0; wbc.nr_to_write = MAX_WRITEBACK_PAGES; wbc.pages_skipped = 0; writeback_inodes(&wbc); min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) { /* Wrote less than expected */ if (wbc.encountered_congestion || wbc.more_io) congestion_wait(WRITE, HZ/10); else break; } } } /* * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back * the whole world. Returns 0 if a pdflush thread was dispatched. Returns * -1 if all pdflush threads were busy. */ int wakeup_pdflush(long nr_pages) { if (nr_pages == 0) nr_pages = global_page_state(NR_FILE_DIRTY) + global_page_state(NR_UNSTABLE_NFS); return pdflush_operation(background_writeout, nr_pages); } static void wb_timer_fn(unsigned long unused); static void laptop_timer_fn(unsigned long unused); static DEFINE_TIMER(wb_timer, wb_timer_fn, 0, 0); static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0); /* * Periodic writeback of "old" data. * * Define "old": the first time one of an inode's pages is dirtied, we mark the * dirtying-time in the inode's address_space. So this periodic writeback code * just walks the superblock inode list, writing back any inodes which are * older than a specific point in time. * * Try to run once per dirty_writeback_interval. But if a writeback event * takes longer than a dirty_writeback_interval interval, then leave a * one-second gap. * * older_than_this takes precedence over nr_to_write. So we'll only write back * all dirty pages if they are all attached to "old" mappings. */ static void wb_kupdate(unsigned long arg) { unsigned long oldest_jif; unsigned long start_jif; unsigned long next_jif; long nr_to_write; struct writeback_control wbc = { .bdi = NULL, .sync_mode = WB_SYNC_NONE, .older_than_this = &oldest_jif, .nr_to_write = 0, .nonblocking = 1, .for_kupdate = 1, .range_cyclic = 1, }; sync_supers(); oldest_jif = jiffies - dirty_expire_interval; start_jif = jiffies; next_jif = start_jif + dirty_writeback_interval; nr_to_write = global_page_state(NR_FILE_DIRTY) + global_page_state(NR_UNSTABLE_NFS) + (inodes_stat.nr_inodes - inodes_stat.nr_unused); while (nr_to_write > 0) {


context:
space:
mode: