Merge commit 'v2.6.30-rc3' into tracing/hw-branch-tracing

Conflicts: arch/x86/kernel/ptrace.c Merge reason: fix the conflict above, and also pick up the CONFIG_BROKEN dependency change from upstream so that we can remove it here. Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Ingo Molnar <mingo@elte.hu> 2009-04-24 04:11:18 -0400
committer: Ingo Molnar <mingo@elte.hu> 2009-04-24 04:11:23 -0400
commit: 416dfdcdb894432547ead4fcb9fa6a36b396059e (patch)
tree: 8033fdda07397a59c5fa98c88927040906ce6c1a /mm
parent: 56449f437add737a1e5e1cb7e00f63ac8ead1938 (diff)
parent: 091069740304c979f957ceacec39c461d0192158 (diff)
8 files changed, 91 insertions, 27 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index b53427ad30a3..57971d2ab848 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -213,6 +213,8 @@ config UNEVICTABLE_LRU
          will use one page flag and increase the code size a little,
          say Y unless you know what you are doing.
+          See Documentation/vm/unevictable-lru.txt for more information.
 config HAVE_MLOCK
        bool
        default y if MMU=y
diff --git a/mm/filemap.c b/mm/filemap.c
index 2e2d38ebda4b..379ff0bcbf6e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -441,6 +441,7 @@ int filemap_write_and_wait_range(struct address_space *mapping,
        }
        return err;
 }
+EXPORT_SYMBOL(filemap_write_and_wait_range);
 /**
 * add_to_page_cache_locked - add a locked page to the pagecache
@@ -567,8 +568,8 @@ EXPORT_SYMBOL(wait_on_page_bit);
 /**
 * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue
- * @page - Page defining the wait queue of interest
+ * @page: Page defining the wait queue of interest
- * @waiter - Waiter to add to the queue
+ * @waiter: Waiter to add to the queue
 *
 * Add an arbitrary @waiter to the wait queue for the nominated @page.
 */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2fc6d6c48238..e44fb0fbb80e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -932,7 +932,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
        if (unlikely(!mem))
                return 0;
-        VM_BUG_ON(mem_cgroup_is_obsolete(mem));
+        VM_BUG_ON(!mem || mem_cgroup_is_obsolete(mem));
        while (1) {
                int ret;
diff --git a/mm/mmap.c b/mm/mmap.c
index 4a3841186c11..3303d1ba8e87 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1575,7 +1575,7 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
         * Overcommit..  This must be the final test, as it will
         * update security statistics.
         */
-        if (security_vm_enough_memory(grow))
+        if (security_vm_enough_memory_mm(mm, grow))
                return -ENOMEM;
        /* Ok, everything looks good - let it rip */
diff --git a/mm/pdflush.c b/mm/pdflush.c
index 118905e3d788..f2caf96993f8 100644
--- a/mm/pdflush.c
+++ b/mm/pdflush.c
@@ -58,6 +58,14 @@ static DEFINE_SPINLOCK(pdflush_lock);
 int nr_pdflush_threads = 0;
 /*
+ * The max/min number of pdflush threads. R/W by sysctl at
+ * /proc/sys/vm/nr_pdflush_threads_max/min
+ */
+int nr_pdflush_threads_max __read_mostly = MAX_PDFLUSH_THREADS;
+int nr_pdflush_threads_min __read_mostly = MIN_PDFLUSH_THREADS;
+/*
 * The time at which the pdflush thread pool last went empty
 */
 static unsigned long last_empty_jifs;
@@ -68,7 +76,7 @@ static unsigned long last_empty_jifs;
 * Thread pool management algorithm:
 * 
 * - The minimum and maximum number of pdflush instances are bound
- *   by MIN_PDFLUSH_THREADS and MAX_PDFLUSH_THREADS.
+ *   by nr_pdflush_threads_min and nr_pdflush_threads_max.
 * 
 * - If there have been no idle pdflush instances for 1 second, create
 *   a new one.
@@ -98,7 +106,6 @@ static int __pdflush(struct pdflush_work *my_work)
        INIT_LIST_HEAD(&my_work->list);
        spin_lock_irq(&pdflush_lock);
-        nr_pdflush_threads++;
        for ( ; ; ) {
                struct pdflush_work *pdf;
@@ -126,20 +133,25 @@ static int __pdflush(struct pdflush_work *my_work)
                (*my_work->fn)(my_work->arg0);
+                spin_lock_irq(&pdflush_lock);
                /*
                 * Thread creation: For how long have there been zero
                 * available threads?
+                 *
+                 * To throttle creation, we reset last_empty_jifs.
                 */
                if (time_after(jiffies, last_empty_jifs + 1 * HZ)) {
-                        /* unlocked list_empty() test is OK here */
+                        if (list_empty(&pdflush_list) &&
-                        if (list_empty(&pdflush_list)) {
+                            nr_pdflush_threads < nr_pdflush_threads_max) {
-                                /* unlocked test is OK here */
+                                last_empty_jifs = jiffies;
-                                if (nr_pdflush_threads < MAX_PDFLUSH_THREADS)
+                                nr_pdflush_threads++;
-                                        start_one_pdflush_thread();
+                                spin_unlock_irq(&pdflush_lock);
+                                start_one_pdflush_thread();
+                                spin_lock_irq(&pdflush_lock);
                        }
                }
-                spin_lock_irq(&pdflush_lock);
                my_work->fn = NULL;
                /*
@@ -148,7 +160,7 @@ static int __pdflush(struct pdflush_work *my_work)
                 */
                if (list_empty(&pdflush_list))
                        continue;
-                if (nr_pdflush_threads <= MIN_PDFLUSH_THREADS)
+                if (nr_pdflush_threads <= nr_pdflush_threads_min)
                        continue;
                pdf = list_entry(pdflush_list.prev, struct pdflush_work, list);
                if (time_after(jiffies, pdf->when_i_went_to_sleep + 1 * HZ)) {
@@ -236,14 +248,27 @@ int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0)
 static void start_one_pdflush_thread(void)
 {
-        kthread_run(pdflush, NULL, "pdflush");
+        struct task_struct *k;
+        k = kthread_run(pdflush, NULL, "pdflush");
+        if (unlikely(IS_ERR(k))) {
+                spin_lock_irq(&pdflush_lock);
+                nr_pdflush_threads--;
+                spin_unlock_irq(&pdflush_lock);
+        }
 }
 static int __init pdflush_init(void)
 {
        int i;
-        for (i = 0; i < MIN_PDFLUSH_THREADS; i++)
+        /*
+         * Pre-set nr_pdflush_threads...  If we fail to create,
+         * the count will be decremented.
+         */
+        nr_pdflush_threads = nr_pdflush_threads_min;
+        for (i = 0; i < nr_pdflush_threads_min; i++)
                start_one_pdflush_thread();
        return 0;
 }
diff --git a/mm/shmem.c b/mm/shmem.c
index d94d2e9146bc..f9cb20ebb990 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -24,6 +24,7 @@
 #include <linux/init.h>
 #include <linux/vfs.h>
 #include <linux/mount.h>
+#include <linux/pagemap.h>
 #include <linux/file.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -43,7 +44,6 @@ static struct vfsmount *shm_mnt;
 #include <linux/exportfs.h>
 #include <linux/generic_acl.h>
 #include <linux/mman.h>
-#include <linux/pagemap.h>
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/backing-dev.h>
@@ -65,13 +65,28 @@ static struct vfsmount *shm_mnt;
 #include <asm/div64.h>
 #include <asm/pgtable.h>
+/*
+ * The maximum size of a shmem/tmpfs file is limited by the maximum size of
+ * its triple-indirect swap vector - see illustration at shmem_swp_entry().
+ *
+ * With 4kB page size, maximum file size is just over 2TB on a 32-bit kernel,
+ * but one eighth of that on a 64-bit kernel.  With 8kB page size, maximum
+ * file size is just over 4TB on a 64-bit kernel, but 16TB on a 32-bit kernel,
+ * MAX_LFS_FILESIZE being then more restrictive than swap vector layout.
+ *
+ * We use / and * instead of shifts in the definitions below, so that the swap
+ * vector can be tested with small even values (e.g. 20) for ENTRIES_PER_PAGE.
+ */
 #define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long))
-#define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
+#define ENTRIES_PER_PAGEPAGE ((unsigned long long)ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
-#define BLOCKS_PER_PAGE  (PAGE_CACHE_SIZE/512)
-#define SHMEM_MAX_INDEX  (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1))
+#define SHMSWP_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1))
-#define SHMEM_MAX_BYTES  ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT)
+#define SHMSWP_MAX_BYTES (SHMSWP_MAX_INDEX << PAGE_CACHE_SHIFT)
+#define SHMEM_MAX_BYTES  min_t(unsigned long long, SHMSWP_MAX_BYTES, MAX_LFS_FILESIZE)
+#define SHMEM_MAX_INDEX  ((unsigned long)((SHMEM_MAX_BYTES+1) >> PAGE_CACHE_SHIFT))
+#define BLOCKS_PER_PAGE  (PAGE_CACHE_SIZE/512)
 #define VM_ACCT(size)    (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
 /* info->flags needs VM_flags to handle pagein/truncate races efficiently */
@@ -2581,7 +2596,7 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
 #define shmem_get_inode(sb, mode, dev, flags)   ramfs_get_inode(sb, mode, dev)
 #define shmem_acct_size(flags, size)            0
 #define shmem_unacct_size(flags, size)          do {} while (0)
-#define SHMEM_MAX_BYTES                         LLONG_MAX
+#define SHMEM_MAX_BYTES                         MAX_LFS_FILESIZE
 #endif /* CONFIG_SHMEM */
diff --git a/mm/util.c b/mm/util.c
index 2599e83eea17..55bef160b9f1 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -223,6 +223,22 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 }
 #endif
+/**
+ * get_user_pages_fast() - pin user pages in memory
+ * @start:      starting user address
+ * @nr_pages:   number of pages from start to pin
+ * @write:      whether pages will be written to
+ * @pages:      array that receives pointers to the pages pinned.
+ *              Should be at least nr_pages long.
+ *
+ * Attempt to pin user pages in memory without taking mm->mmap_sem.
+ * If not successful, it will fall back to taking the lock and
+ * calling get_user_pages().
+ *
+ * Returns number of pages pinned. This may be fewer than the number
+ * requested. If nr_pages is 0 or negative, returns 0. If no pages
+ * were pinned, returns -errno.
+ */
 int __attribute__((weak)) get_user_pages_fast(unsigned long start,
                                int nr_pages, int write, struct page **pages)
 {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 39fdfb14eeaa..eac9577941f9 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -63,6 +63,9 @@ struct scan_control {
        /* Can mapped pages be reclaimed? */
        int may_unmap;
+        /* Can pages be swapped as part of reclaim? */
+        int may_swap;
        /* This context's SWAP_CLUSTER_MAX. If freeing memory for
         * suspend, we effectively ignore SWAP_CLUSTER_MAX.
         * In this context, it doesn't matter that we scan the
@@ -1380,7 +1383,7 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
        struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
        /* If we have no swap space, do not bother scanning anon pages. */
-        if (nr_swap_pages <= 0) {
+        if (!sc->may_swap || (nr_swap_pages <= 0)) {
                percent[0] = 0;
                percent[1] = 100;
                return;
@@ -1697,6 +1700,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
                .may_writepage = !laptop_mode,
                .swap_cluster_max = SWAP_CLUSTER_MAX,
                .may_unmap = 1,
+                .may_swap = 1,
                .swappiness = vm_swappiness,
                .order = order,
                .mem_cgroup = NULL,
@@ -1717,6 +1721,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
        struct scan_control sc = {
                .may_writepage = !laptop_mode,
                .may_unmap = 1,
+                .may_swap = !noswap,
                .swap_cluster_max = SWAP_CLUSTER_MAX,
                .swappiness = swappiness,
                .order = 0,
@@ -1726,9 +1731,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
        };
        struct zonelist *zonelist;
-        if (noswap)
-                sc.may_unmap = 0;
        sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
                        (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
        zonelist = NODE_DATA(numa_node_id())->node_zonelists;
@@ -1767,6 +1769,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
        struct scan_control sc = {
                .gfp_mask = GFP_KERNEL,
                .may_unmap = 1,
+                .may_swap = 1,
                .swap_cluster_max = SWAP_CLUSTER_MAX,
                .swappiness = vm_swappiness,
                .order = order,
@@ -2088,13 +2091,13 @@ static void shrink_all_zones(unsigned long nr_pages, int prio,
                                nr_reclaimed += shrink_list(l, nr_to_scan, zone,
                                                                sc, prio);
                                if (nr_reclaimed >= nr_pages) {
-                                        sc->nr_reclaimed = nr_reclaimed;
+                                        sc->nr_reclaimed += nr_reclaimed;
                                        return;
                                }
                        }
                }
        }
-        sc->nr_reclaimed = nr_reclaimed;
+        sc->nr_reclaimed += nr_reclaimed;
 }
 /*
@@ -2115,6 +2118,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
                .may_unmap = 0,
                .may_writepage = 1,
                .isolate_pages = isolate_pages_global,
+                .nr_reclaimed = 0,
        };
        current->reclaim_state = &reclaim_state;
@@ -2297,6 +2301,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
        struct scan_control sc = {
                .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
                .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
+                .may_swap = 1,
                .swap_cluster_max = max_t(unsigned long, nr_pages,
                                        SWAP_CLUSTER_MAX),
                .gfp_mask = gfp_mask,
author	Ingo Molnar <mingo@elte.hu>	2009-04-24 04:11:18 -0400
committer	Ingo Molnar <mingo@elte.hu>	2009-04-24 04:11:23 -0400
commit	416dfdcdb894432547ead4fcb9fa6a36b396059e (patch)
tree	8033fdda07397a59c5fa98c88927040906ce6c1a /mm
parent	56449f437add737a1e5e1cb7e00f63ac8ead1938 (diff)
parent	091069740304c979f957ceacec39c461d0192158 (diff)