mm: avoid false sharing of mm_counter

Considering the nature of per mm stats, it's the shared object among threads and can be a cache-miss point in the page fault path. This patch adds per-thread cache for mm_counter. RSS value will be counted into a struct in task_struct and synchronized with mm's one at events. Now, in this patch, the event is the number of calls to handle_mm_fault. Per-thread value is added to mm at each 64 calls. rough estimation with small benchmark on parallel thread (2threads) shows [before] 4.5 cache-miss/faults [after] 4.0 cache-miss/faults Anyway, the most contended object is mmap_sem if the number of threads grows. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Minchan Kim <minchan.kim@gmail.com> Cc: Christoph Lameter <cl@linux-foundation.org> Cc: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> 2010-03-05 16:41:40 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> 2010-03-06 14:26:24 -0500
commit: 34e55232e59f7b19050267a05ff1226e5cd122a5 (patch)
tree: 6b94e776e87d2a2fe1ceca7c5606901575323900
parent: d559db086ff5be9bcc259e5aa50bf3d881eaf1d1 (diff)
7 files changed, 107 insertions, 15 deletions
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 0d07513a67a6..e418f3d8f427 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -188,6 +188,12 @@ memory usage. Its seven fields are explained in Table 1-3.  The stat file
 contains details information about the process itself.  Its fields are
 explained in Table 1-4.
+(for SMP CONFIG users)
+For making accounting scalable, RSS related information are handled in
+asynchronous manner and the vaule may not be very precise. To see a precise
+snapshot of a moment, you can see /proc/<pid>/smaps file and scan page table.
+It's slow but very precise.
 Table 1-2: Contents of the statm files (as of 2.6.30-rc7)
 ..............................................................................
 Field                       Content
diff --git a/fs/exec.c b/fs/exec.c
index cce6bbdbdbb1..ea7861727efd 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -718,6 +718,7 @@ static int exec_mmap(struct mm_struct *mm)
        /* Notify parent that we're no longer interested in the old VM */
        tsk = current;
        old_mm = current->mm;
+        sync_mm_rss(tsk, old_mm);
        mm_release(tsk, old_mm);
        if (old_mm) {
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2124cdb2d1d0..8e580c07d171 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -873,7 +873,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 /*
 * per-process(per-mm_struct) statistics.
 */
-#if USE_SPLIT_PTLOCKS
+#if defined(SPLIT_RSS_COUNTING)
 /*
 * The mm counters are not protected by its page_table_lock,
 * so must be incremented atomically.
@@ -883,10 +883,7 @@ static inline void set_mm_counter(struct mm_struct *mm, int member, long value)
        atomic_long_set(&mm->rss_stat.count[member], value);
 }
-static inline unsigned long get_mm_counter(struct mm_struct *mm, int member)
+unsigned long get_mm_counter(struct mm_struct *mm, int member);
-{
-        return (unsigned long)atomic_long_read(&mm->rss_stat.count[member]);
-}
 static inline void add_mm_counter(struct mm_struct *mm, int member, long value)
 {
@@ -974,6 +971,7 @@ static inline void setmax_mm_hiwater_rss(unsigned long *maxrss,
                *maxrss = hiwater_rss;
 }
+void sync_mm_rss(struct task_struct *task, struct mm_struct *mm);
 /*
 * A callback you can register to apply pressure to ageable caches.
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index e1ca64be6678..21861239ab0c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -202,9 +202,15 @@ enum {
 };
 #if USE_SPLIT_PTLOCKS
+#define SPLIT_RSS_COUNTING
 struct mm_rss_stat {
        atomic_long_t count[NR_MM_COUNTERS];
 };
+/* per-thread cached information, */
+struct task_rss_stat {
+        int events;     /* for synchronization threshold */
+        int count[NR_MM_COUNTERS];
+};
 #else  /* !USE_SPLIT_PTLOCKS */
 struct mm_rss_stat {
        unsigned long count[NR_MM_COUNTERS];
diff --git a/include/linux/sched.h b/include/linux/sched.h
index cbeafa49a53b..46c6f8d5dc06 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1220,7 +1220,9 @@ struct task_struct {
        struct plist_node pushable_tasks;
        struct mm_struct *mm, *active_mm;
+#if defined(SPLIT_RSS_COUNTING)
+        struct task_rss_stat    rss_stat;
+#endif
 /* task state */
        int exit_state;
        int exit_code, exit_signal;
diff --git a/kernel/exit.c b/kernel/exit.c
index 45ed043b8bf5..10d3c5d5ae44 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -952,7 +952,8 @@ NORET_TYPE void do_exit(long code)
                                preempt_count());
        acct_update_integrals(tsk);
+        /* sync mm's RSS info before statistics gathering */
+        sync_mm_rss(tsk, tsk->mm);
        group_dead = atomic_dec_and_test(&tsk->signal->live);
        if (group_dead) {
                hrtimer_cancel(&tsk->signal->real_timer);
diff --git a/mm/memory.c b/mm/memory.c
index c57678478801..a4597614f18d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -122,6 +122,79 @@ static int __init init_zero_pfn(void)
 core_initcall(init_zero_pfn);
+#if defined(SPLIT_RSS_COUNTING)
+void __sync_task_rss_stat(struct task_struct *task, struct mm_struct *mm)
+{
+        int i;
+        for (i = 0; i < NR_MM_COUNTERS; i++) {
+                if (task->rss_stat.count[i]) {
+                        add_mm_counter(mm, i, task->rss_stat.count[i]);
+                        task->rss_stat.count[i] = 0;
+                }
+        }
+        task->rss_stat.events = 0;
+}
+static void add_mm_counter_fast(struct mm_struct *mm, int member, int val)
+{
+        struct task_struct *task = current;
+        if (likely(task->mm == mm))
+                task->rss_stat.count[member] += val;
+        else
+                add_mm_counter(mm, member, val);
+}
+#define inc_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, 1)
+#define dec_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, -1)
+/* sync counter once per 64 page faults */
+#define TASK_RSS_EVENTS_THRESH  (64)
+static void check_sync_rss_stat(struct task_struct *task)
+{
+        if (unlikely(task != current))
+                return;
+        if (unlikely(task->rss_stat.events++ > TASK_RSS_EVENTS_THRESH))
+                __sync_task_rss_stat(task, task->mm);
+}
+unsigned long get_mm_counter(struct mm_struct *mm, int member)
+{
+        long val = 0;
+        /*
+         * Don't use task->mm here...for avoiding to use task_get_mm()..
+         * The caller must guarantee task->mm is not invalid.
+         */
+        val = atomic_long_read(&mm->rss_stat.count[member]);
+        /*
+         * counter is updated in asynchronous manner and may go to minus.
+         * But it's never be expected number for users.
+         */
+        if (val < 0)
+                return 0;
+        return (unsigned long)val;
+}
+void sync_mm_rss(struct task_struct *task, struct mm_struct *mm)
+{
+        __sync_task_rss_stat(task, mm);
+}
+#else
+#define inc_mm_counter_fast(mm, member) inc_mm_counter(mm, member)
+#define dec_mm_counter_fast(mm, member) dec_mm_counter(mm, member)
+static void check_sync_rss_stat(struct task_struct *task)
+{
+}
+void sync_mm_rss(struct task_struct *task, struct mm_struct *mm)
+{
+}
+#endif
 /*
 * If a p?d_bad entry is found while walking page tables, report
 * the error, before resetting entry to p?d_none.  Usually (but
@@ -386,6 +459,8 @@ static inline void add_mm_rss_vec(struct mm_struct *mm, int *rss)
 {
        int i;
+        if (current->mm == mm)
+                sync_mm_rss(current, mm);
        for (i = 0; i < NR_MM_COUNTERS; i++)
                if (rss[i])
                        add_mm_counter(mm, i, rss[i]);
@@ -1539,7 +1614,7 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr,
        /* Ok, finally just insert the thing.. */
        get_page(page);
-        inc_mm_counter(mm, MM_FILEPAGES);
+        inc_mm_counter_fast(mm, MM_FILEPAGES);
        page_add_file_rmap(page);
        set_pte_at(mm, addr, pte, mk_pte(page, prot));
@@ -2175,11 +2250,11 @@ gotten:
        if (likely(pte_same(*page_table, orig_pte))) {
                if (old_page) {
                        if (!PageAnon(old_page)) {
-                                dec_mm_counter(mm, MM_FILEPAGES);
+                                dec_mm_counter_fast(mm, MM_FILEPAGES);
-                                inc_mm_counter(mm, MM_ANONPAGES);
+                                inc_mm_counter_fast(mm, MM_ANONPAGES);
                        }
                } else
-                        inc_mm_counter(mm, MM_ANONPAGES);
+                        inc_mm_counter_fast(mm, MM_ANONPAGES);
                flush_cache_page(vma, address, pte_pfn(orig_pte));
                entry = mk_pte(new_page, vma->vm_page_prot);
                entry = maybe_mkwrite(pte_mkdirty(entry), vma);
@@ -2616,7 +2691,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
         * discarded at swap_free().
         */
-        inc_mm_counter(mm, MM_ANONPAGES);
+        inc_mm_counter_fast(mm, MM_ANONPAGES);
        pte = mk_pte(page, vma->vm_page_prot);
        if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {
                pte = maybe_mkwrite(pte_mkdirty(pte), vma);
@@ -2700,7 +2775,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
        if (!pte_none(*page_table))
                goto release;
-        inc_mm_counter(mm, MM_ANONPAGES);
+        inc_mm_counter_fast(mm, MM_ANONPAGES);
        page_add_new_anon_rmap(page, vma, address);
 setpte:
        set_pte_at(mm, address, page_table, entry);
@@ -2854,10 +2929,10 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                if (flags & FAULT_FLAG_WRITE)
                        entry = maybe_mkwrite(pte_mkdirty(entry), vma);
                if (anon) {
-                        inc_mm_counter(mm, MM_ANONPAGES);
+                        inc_mm_counter_fast(mm, MM_ANONPAGES);
                        page_add_new_anon_rmap(page, vma, address);
                } else {
-                        inc_mm_counter(mm, MM_FILEPAGES);
+                        inc_mm_counter_fast(mm, MM_FILEPAGES);
                        page_add_file_rmap(page);
                        if (flags & FAULT_FLAG_WRITE) {
                                dirty_page = page;
@@ -3035,6 +3110,9 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
        count_vm_event(PGFAULT);
+        /* do counter updates before entering really critical section. */
+        check_sync_rss_stat(current);
        if (unlikely(is_vm_hugetlb_page(vma)))
                return hugetlb_fault(mm, vma, address, flags);
author	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>	2010-03-05 16:41:40 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	2010-03-06 14:26:24 -0500
commit	34e55232e59f7b19050267a05ff1226e5cd122a5 (patch)
tree	6b94e776e87d2a2fe1ceca7c5606901575323900
parent	d559db086ff5be9bcc259e5aa50bf3d881eaf1d1 (diff)

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 0d07513a67a6..e418f3d8f427 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt
@@ -188,6 +188,12 @@ memory usage. Its seven fields are explained in Table 1-3. The stat file
188	contains details information about the process itself. Its fields are	188	contains details information about the process itself. Its fields are
189	explained in Table 1-4.	189	explained in Table 1-4.
190		190
		191	(for SMP CONFIG users)
		192	For making accounting scalable, RSS related information are handled in
		193	asynchronous manner and the vaule may not be very precise. To see a precise
		194	snapshot of a moment, you can see /proc/<pid>/smaps file and scan page table.
		195	It's slow but very precise.
		196
191	Table 1-2: Contents of the statm files (as of 2.6.30-rc7)	197	Table 1-2: Contents of the statm files (as of 2.6.30-rc7)
192	..............................................................................	198	..............................................................................
193	Field Content	199	Field Content


diff --git a/fs/exec.c b/fs/exec.c index cce6bbdbdbb1..ea7861727efd 100644 --- a/fs/exec.c +++ b/fs/exec.c
@@ -718,6 +718,7 @@ static int exec_mmap(struct mm_struct *mm)
718	/* Notify parent that we're no longer interested in the old VM */	718	/* Notify parent that we're no longer interested in the old VM */
719	tsk = current;	719	tsk = current;
720	old_mm = current->mm;	720	old_mm = current->mm;
		721	sync_mm_rss(tsk, old_mm);
721	mm_release(tsk, old_mm);	722	mm_release(tsk, old_mm);
722		723
723	if (old_mm) {	724	if (old_mm) {


diff --git a/include/linux/mm.h b/include/linux/mm.h index 2124cdb2d1d0..8e580c07d171 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h
@@ -873,7 +873,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
873	/*	873	/*
874	* per-process(per-mm_struct) statistics.	874	* per-process(per-mm_struct) statistics.
875	*/	875	*/
876	#if USE_SPLIT_PTLOCKS	876	#if defined(SPLIT_RSS_COUNTING)
877	/*	877	/*
878	* The mm counters are not protected by its page_table_lock,	878	* The mm counters are not protected by its page_table_lock,
879	* so must be incremented atomically.	879	* so must be incremented atomically.
@@ -883,10 +883,7 @@ static inline void set_mm_counter(struct mm_struct *mm, int member, long value)
883	atomic_long_set(&mm->rss_stat.count[member], value);	883	atomic_long_set(&mm->rss_stat.count[member], value);
884	}	884	}
885		885
886	static inline unsigned long get_mm_counter(struct mm_struct *mm, int member)	886	unsigned long get_mm_counter(struct mm_struct *mm, int member);
887	{
888	return (unsigned long)atomic_long_read(&mm->rss_stat.count[member]);
889	}
890		887
891	static inline void add_mm_counter(struct mm_struct *mm, int member, long value)	888	static inline void add_mm_counter(struct mm_struct *mm, int member, long value)
892	{	889	{
@@ -974,6 +971,7 @@ static inline void setmax_mm_hiwater_rss(unsigned long *maxrss,
974	*maxrss = hiwater_rss;	971	*maxrss = hiwater_rss;
975	}	972	}
976		973
		974	void sync_mm_rss(struct task_struct task, struct mm_struct mm);
977		975
978	/*	976	/*
979	* A callback you can register to apply pressure to ageable caches.	977	* A callback you can register to apply pressure to ageable caches.


diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index e1ca64be6678..21861239ab0c 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h
@@ -202,9 +202,15 @@ enum {
202	};	202	};
203		203
204	#if USE_SPLIT_PTLOCKS	204	#if USE_SPLIT_PTLOCKS
		205	#define SPLIT_RSS_COUNTING
205	struct mm_rss_stat {	206	struct mm_rss_stat {
206	atomic_long_t count[NR_MM_COUNTERS];	207	atomic_long_t count[NR_MM_COUNTERS];
207	};	208	};
		209	/* per-thread cached information, */
		210	struct task_rss_stat {
		211	int events; /* for synchronization threshold */
		212	int count[NR_MM_COUNTERS];
		213	};
208	#else /* !USE_SPLIT_PTLOCKS */	214	#else /* !USE_SPLIT_PTLOCKS */
209	struct mm_rss_stat {	215	struct mm_rss_stat {
210	unsigned long count[NR_MM_COUNTERS];	216	unsigned long count[NR_MM_COUNTERS];


diff --git a/include/linux/sched.h b/include/linux/sched.h index cbeafa49a53b..46c6f8d5dc06 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h
@@ -1220,7 +1220,9 @@ struct task_struct {
1220	struct plist_node pushable_tasks;	1220	struct plist_node pushable_tasks;
1221		1221
1222	struct mm_struct mm, active_mm;	1222	struct mm_struct mm, active_mm;
1223		1223	#if defined(SPLIT_RSS_COUNTING)
		1224	struct task_rss_stat rss_stat;
		1225	#endif
1224	/* task state */	1226	/* task state */
1225	int exit_state;	1227	int exit_state;
1226	int exit_code, exit_signal;	1228	int exit_code, exit_signal;


diff --git a/kernel/exit.c b/kernel/exit.c index 45ed043b8bf5..10d3c5d5ae44 100644 --- a/kernel/exit.c +++ b/kernel/exit.c
@@ -952,7 +952,8 @@ NORET_TYPE void do_exit(long code)
952	preempt_count());	952	preempt_count());
953		953
954	acct_update_integrals(tsk);	954	acct_update_integrals(tsk);
955		955	/* sync mm's RSS info before statistics gathering */
		956	sync_mm_rss(tsk, tsk->mm);
956	group_dead = atomic_dec_and_test(&tsk->signal->live);	957	group_dead = atomic_dec_and_test(&tsk->signal->live);
957	if (group_dead) {	958	if (group_dead) {
958	hrtimer_cancel(&tsk->signal->real_timer);	959	hrtimer_cancel(&tsk->signal->real_timer);


diff --git a/mm/memory.c b/mm/memory.c index c57678478801..a4597614f18d 100644 --- a/mm/memory.c +++ b/mm/memory.c
@@ -122,6 +122,79 @@ static int __init init_zero_pfn(void)
122	core_initcall(init_zero_pfn);	122	core_initcall(init_zero_pfn);
123		123
124		124
		125	#if defined(SPLIT_RSS_COUNTING)
		126
		127	void __sync_task_rss_stat(struct task_struct task, struct mm_struct mm)
		128	{
		129	int i;
		130
		131	for (i = 0; i < NR_MM_COUNTERS; i++) {
		132	if (task->rss_stat.count[i]) {
		133	add_mm_counter(mm, i, task->rss_stat.count[i]);
		134	task->rss_stat.count[i] = 0;
		135	}
		136	}
		137	task->rss_stat.events = 0;
		138	}
		139
		140	static void add_mm_counter_fast(struct mm_struct *mm, int member, int val)
		141	{
		142	struct task_struct *task = current;
		143
		144	if (likely(task->mm == mm))
		145	task->rss_stat.count[member] += val;
		146	else
		147	add_mm_counter(mm, member, val);
		148	}
		149	#define inc_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, 1)
		150	#define dec_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, -1)
		151
		152	/* sync counter once per 64 page faults */
		153	#define TASK_RSS_EVENTS_THRESH (64)
		154	static void check_sync_rss_stat(struct task_struct *task)
		155	{
		156	if (unlikely(task != current))
		157	return;
		158	if (unlikely(task->rss_stat.events++ > TASK_RSS_EVENTS_THRESH))
		159	__sync_task_rss_stat(task, task->mm);
		160	}
		161
		162	unsigned long get_mm_counter(struct mm_struct *mm, int member)
		163	{
		164	long val = 0;
		165
		166	/*
		167	* Don't use task->mm here...for avoiding to use task_get_mm()..
		168	* The caller must guarantee task->mm is not invalid.
		169	*/
		170	val = atomic_long_read(&mm->rss_stat.count[member]);
		171	/*
		172	* counter is updated in asynchronous manner and may go to minus.
		173	* But it's never be expected number for users.
		174	*/
		175	if (val < 0)
		176	return 0;
		177	return (unsigned long)val;
		178	}
		179
		180	void sync_mm_rss(struct task_struct task, struct mm_struct mm)
		181	{
		182	__sync_task_rss_stat(task, mm);
		183	}
		184	#else
		185
		186	#define inc_mm_counter_fast(mm, member) inc_mm_counter(mm, member)
		187	#define dec_mm_counter_fast(mm, member) dec_mm_counter(mm, member)
		188
		189	static void check_sync_rss_stat(struct task_struct *task)
		190	{
		191	}
		192
		193	void sync_mm_rss(struct task_struct task, struct mm_struct mm)
		194	{
		195	}
		196	#endif
		197
125	/*	198	/*
126	* If a p?d_bad entry is found while walking page tables, report	199	* If a p?d_bad entry is found while walking page tables, report
127	* the error, before resetting entry to p?d_none. Usually (but	200	* the error, before resetting entry to p?d_none. Usually (but
@@ -386,6 +459,8 @@ static inline void add_mm_rss_vec(struct mm_struct mm, int rss)
386	{	459	{
387	int i;	460	int i;
388		461
		462	if (current->mm == mm)
		463	sync_mm_rss(current, mm);
389	for (i = 0; i < NR_MM_COUNTERS; i++)	464	for (i = 0; i < NR_MM_COUNTERS; i++)
390	if (rss[i])	465	if (rss[i])
391	add_mm_counter(mm, i, rss[i]);	466	add_mm_counter(mm, i, rss[i]);
@@ -1539,7 +1614,7 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr,
1539		1614
1540	/* Ok, finally just insert the thing.. */	1615	/* Ok, finally just insert the thing.. */
1541	get_page(page);	1616	get_page(page);
1542	inc_mm_counter(mm, MM_FILEPAGES);	1617	inc_mm_counter_fast(mm, MM_FILEPAGES);
1543	page_add_file_rmap(page);	1618	page_add_file_rmap(page);
1544	set_pte_at(mm, addr, pte, mk_pte(page, prot));	1619	set_pte_at(mm, addr, pte, mk_pte(page, prot));
1545		1620
@@ -2175,11 +2250,11 @@ gotten:
2175	if (likely(pte_same(*page_table, orig_pte))) {	2250	if (likely(pte_same(*page_table, orig_pte))) {
2176	if (old_page) {	2251	if (old_page) {
2177	if (!PageAnon(old_page)) {	2252	if (!PageAnon(old_page)) {
2178	dec_mm_counter(mm, MM_FILEPAGES);	2253	dec_mm_counter_fast(mm, MM_FILEPAGES);
2179	inc_mm_counter(mm, MM_ANONPAGES);	2254	inc_mm_counter_fast(mm, MM_ANONPAGES);
2180	}	2255	}
2181	} else	2256	} else
2182	inc_mm_counter(mm, MM_ANONPAGES);	2257	inc_mm_counter_fast(mm, MM_ANONPAGES);
2183	flush_cache_page(vma, address, pte_pfn(orig_pte));	2258	flush_cache_page(vma, address, pte_pfn(orig_pte));
2184	entry = mk_pte(new_page, vma->vm_page_prot);	2259	entry = mk_pte(new_page, vma->vm_page_prot);
2185	entry = maybe_mkwrite(pte_mkdirty(entry), vma);	2260	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
@@ -2616,7 +2691,7 @@ static int do_swap_page(struct mm_struct mm, struct vm_area_struct vma,
2616	* discarded at swap_free().	2691	* discarded at swap_free().
2617	*/	2692	*/
2618		2693
2619	inc_mm_counter(mm, MM_ANONPAGES);	2694	inc_mm_counter_fast(mm, MM_ANONPAGES);
2620	pte = mk_pte(page, vma->vm_page_prot);	2695	pte = mk_pte(page, vma->vm_page_prot);
2621	if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {	2696	if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {
2622	pte = maybe_mkwrite(pte_mkdirty(pte), vma);	2697	pte = maybe_mkwrite(pte_mkdirty(pte), vma);
@@ -2700,7 +2775,7 @@ static int do_anonymous_page(struct mm_struct mm, struct vm_area_struct vma,
2700	if (!pte_none(*page_table))	2775	if (!pte_none(*page_table))
2701	goto release;	2776	goto release;
2702		2777
2703	inc_mm_counter(mm, MM_ANONPAGES);	2778	inc_mm_counter_fast(mm, MM_ANONPAGES);
2704	page_add_new_anon_rmap(page, vma, address);	2779	page_add_new_anon_rmap(page, vma, address);
2705	setpte:	2780	setpte:
2706	set_pte_at(mm, address, page_table, entry);	2781	set_pte_at(mm, address, page_table, entry);
@@ -2854,10 +2929,10 @@ static int __do_fault(struct mm_struct mm, struct vm_area_struct vma,
2854	if (flags & FAULT_FLAG_WRITE)	2929	if (flags & FAULT_FLAG_WRITE)
2855	entry = maybe_mkwrite(pte_mkdirty(entry), vma);	2930	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
2856	if (anon) {	2931	if (anon) {
2857	inc_mm_counter(mm, MM_ANONPAGES);	2932	inc_mm_counter_fast(mm, MM_ANONPAGES);
2858	page_add_new_anon_rmap(page, vma, address);	2933	page_add_new_anon_rmap(page, vma, address);
2859	} else {	2934	} else {
2860	inc_mm_counter(mm, MM_FILEPAGES);	2935	inc_mm_counter_fast(mm, MM_FILEPAGES);
2861	page_add_file_rmap(page);	2936	page_add_file_rmap(page);
2862	if (flags & FAULT_FLAG_WRITE) {	2937	if (flags & FAULT_FLAG_WRITE) {
2863	dirty_page = page;	2938	dirty_page = page;
@@ -3035,6 +3110,9 @@ int handle_mm_fault(struct mm_struct mm, struct vm_area_struct vma,
3035		3110
3036	count_vm_event(PGFAULT);	3111	count_vm_event(PGFAULT);
3037		3112
		3113	/* do counter updates before entering really critical section. */
		3114	check_sync_rss_stat(current);
		3115
3038	if (unlikely(is_vm_hugetlb_page(vma)))	3116	if (unlikely(is_vm_hugetlb_page(vma)))
3039	return hugetlb_fault(mm, vma, address, flags);	3117	return hugetlb_fault(mm, vma, address, flags);
3040		3118