aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/kdump/kdump.txt20
-rw-r--r--arch/arm/Kconfig2
-rw-r--r--arch/x86/kernel/acpi/cstate.c3
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c10
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c15
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c12
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-ich.c3
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c3
-rw-r--r--arch/x86/kernel/ldt.c6
-rw-r--r--arch/x86/kernel/microcode.c17
-rw-r--r--arch/x86/kernel/reboot.c11
-rw-r--r--arch/x86/kernel/setup_percpu.c21
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/mm/gup.c9
-rw-r--r--drivers/acpi/processor_throttling.c11
-rw-r--r--drivers/char/pcmcia/ipwireless/hardware.c2
-rw-r--r--drivers/firmware/dcdbas.c3
-rw-r--r--drivers/misc/sgi-xp/xpc_main.c3
-rw-r--r--drivers/scsi/qla2xxx/qla_attr.c1
-rw-r--r--drivers/spi/mpc52xx_psc_spi.c22
-rw-r--r--drivers/spi/spi_s3c24xx.c1
-rw-r--r--drivers/video/sh7760fb.c1
-rw-r--r--fs/bio-integrity.c1
-rw-r--r--fs/buffer.c46
-rw-r--r--fs/ecryptfs/crypto.c30
-rw-r--r--fs/exec.c1
-rw-r--r--fs/ext2/inode.c1
-rw-r--r--fs/ext3/inode.c67
-rw-r--r--fs/ext4/inode.c92
-rw-r--r--include/asm-arm/arch-s3c2410/spi.h1
-rw-r--r--include/asm-generic/gpio.h1
-rw-r--r--include/asm-generic/pgtable-nopmd.h6
-rw-r--r--include/linux/buffer_head.h2
-rw-r--r--include/linux/cpumask.h63
-rw-r--r--include/linux/fs.h44
-rw-r--r--include/linux/mm.h3
-rw-r--r--include/linux/mm_types.h4
-rw-r--r--include/linux/mmu_notifier.h279
-rw-r--r--include/linux/pagemap.h1
-rw-r--r--include/linux/rculist.h28
-rw-r--r--include/linux/rmap.h8
-rw-r--r--kernel/cpu.c25
-rw-r--r--kernel/fork.c3
-rw-r--r--kernel/time/tick-common.c8
-rw-r--r--kernel/trace/trace_sysprof.c4
-rw-r--r--lib/ratelimit.c3
-rw-r--r--lib/smp_processor_id.c5
-rw-r--r--mm/Kconfig3
-rw-r--r--mm/Makefile1
-rw-r--r--mm/filemap.c14
-rw-r--r--mm/filemap_xip.c3
-rw-r--r--mm/fremap.c3
-rw-r--r--mm/hugetlb.c4
-rw-r--r--mm/memory.c35
-rw-r--r--mm/mmap.c160
-rw-r--r--mm/mmu_notifier.c277
-rw-r--r--mm/mprotect.c3
-rw-r--r--mm/mremap.c6
-rw-r--r--mm/rmap.c13
-rw-r--r--mm/shmem.c3
-rw-r--r--net/sunrpc/svc.c3
61 files changed, 1132 insertions, 299 deletions
diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
index 9691c7f5166..0705040531a 100644
--- a/Documentation/kdump/kdump.txt
+++ b/Documentation/kdump/kdump.txt
@@ -65,26 +65,26 @@ Install kexec-tools
65 65
662) Download the kexec-tools user-space package from the following URL: 662) Download the kexec-tools user-space package from the following URL:
67 67
68http://www.kernel.org/pub/linux/kernel/people/horms/kexec-tools/kexec-tools-testing.tar.gz 68http://www.kernel.org/pub/linux/kernel/people/horms/kexec-tools/kexec-tools.tar.gz
69 69
70This is a symlink to the latest version, which at the time of writing is 70This is a symlink to the latest version.
7120061214, the only release of kexec-tools-testing so far. As other versions
72are released, the older ones will remain available at
73http://www.kernel.org/pub/linux/kernel/people/horms/kexec-tools/
74 71
75Note: Latest kexec-tools-testing git tree is available at 72The latest kexec-tools git tree is available at:
76 73
77git://git.kernel.org/pub/scm/linux/kernel/git/horms/kexec-tools-testing.git 74git://git.kernel.org/pub/scm/linux/kernel/git/horms/kexec-tools.git
78or 75or
79http://www.kernel.org/git/?p=linux/kernel/git/horms/kexec-tools-testing.git;a=summary 76http://www.kernel.org/git/?p=linux/kernel/git/horms/kexec-tools.git
77
78More information about kexec-tools can be found at
79http://www.kernel.org/pub/linux/kernel/people/horms/kexec-tools/README.html
80 80
813) Unpack the tarball with the tar command, as follows: 813) Unpack the tarball with the tar command, as follows:
82 82
83 tar xvpzf kexec-tools-testing.tar.gz 83 tar xvpzf kexec-tools.tar.gz
84 84
854) Change to the kexec-tools directory, as follows: 854) Change to the kexec-tools directory, as follows:
86 86
87 cd kexec-tools-testing-VERSION 87 cd kexec-tools-VERSION
88 88
895) Configure the package, as follows: 895) Configure the package, as follows:
90 90
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 652cd32a09c..257033c691f 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -235,6 +235,7 @@ config ARCH_VERSATILE
235config ARCH_AT91 235config ARCH_AT91
236 bool "Atmel AT91" 236 bool "Atmel AT91"
237 select GENERIC_GPIO 237 select GENERIC_GPIO
238 select HAVE_CLK
238 help 239 help
239 This enables support for systems based on the Atmel AT91RM9200, 240 This enables support for systems based on the Atmel AT91RM9200,
240 AT91SAM9 and AT91CAP9 processors. 241 AT91SAM9 and AT91CAP9 processors.
@@ -268,7 +269,6 @@ config ARCH_EP93XX
268 select ARM_VIC 269 select ARM_VIC
269 select GENERIC_GPIO 270 select GENERIC_GPIO
270 select HAVE_CLK 271 select HAVE_CLK
271 select HAVE_CLK
272 select ARCH_REQUIRE_GPIOLIB 272 select ARCH_REQUIRE_GPIOLIB
273 help 273 help
274 This enables support for the Cirrus EP93xx series of CPUs. 274 This enables support for the Cirrus EP93xx series of CPUs.
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 9220cf46aa1..c2502eb9aa8 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -73,7 +73,6 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
73 struct cpuinfo_x86 *c = &cpu_data(cpu); 73 struct cpuinfo_x86 *c = &cpu_data(cpu);
74 74
75 cpumask_t saved_mask; 75 cpumask_t saved_mask;
76 cpumask_of_cpu_ptr(new_mask, cpu);
77 int retval; 76 int retval;
78 unsigned int eax, ebx, ecx, edx; 77 unsigned int eax, ebx, ecx, edx;
79 unsigned int edx_part; 78 unsigned int edx_part;
@@ -92,7 +91,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
92 91
93 /* Make sure we are running on right CPU */ 92 /* Make sure we are running on right CPU */
94 saved_mask = current->cpus_allowed; 93 saved_mask = current->cpus_allowed;
95 retval = set_cpus_allowed_ptr(current, new_mask); 94 retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
96 if (retval) 95 if (retval)
97 return -1; 96 return -1;
98 97
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index ff2fff56f0a..dd097b83583 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -200,12 +200,10 @@ static void drv_read(struct drv_cmd *cmd)
200static void drv_write(struct drv_cmd *cmd) 200static void drv_write(struct drv_cmd *cmd)
201{ 201{
202 cpumask_t saved_mask = current->cpus_allowed; 202 cpumask_t saved_mask = current->cpus_allowed;
203 cpumask_of_cpu_ptr_declare(cpu_mask);
204 unsigned int i; 203 unsigned int i;
205 204
206 for_each_cpu_mask_nr(i, cmd->mask) { 205 for_each_cpu_mask_nr(i, cmd->mask) {
207 cpumask_of_cpu_ptr_next(cpu_mask, i); 206 set_cpus_allowed_ptr(current, &cpumask_of_cpu(i));
208 set_cpus_allowed_ptr(current, cpu_mask);
209 do_drv_write(cmd); 207 do_drv_write(cmd);
210 } 208 }
211 209
@@ -269,12 +267,11 @@ static unsigned int get_measured_perf(unsigned int cpu)
269 } aperf_cur, mperf_cur; 267 } aperf_cur, mperf_cur;
270 268
271 cpumask_t saved_mask; 269 cpumask_t saved_mask;
272 cpumask_of_cpu_ptr(cpu_mask, cpu);
273 unsigned int perf_percent; 270 unsigned int perf_percent;
274 unsigned int retval; 271 unsigned int retval;
275 272
276 saved_mask = current->cpus_allowed; 273 saved_mask = current->cpus_allowed;
277 set_cpus_allowed_ptr(current, cpu_mask); 274 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
278 if (get_cpu() != cpu) { 275 if (get_cpu() != cpu) {
279 /* We were not able to run on requested processor */ 276 /* We were not able to run on requested processor */
280 put_cpu(); 277 put_cpu();
@@ -340,7 +337,6 @@ static unsigned int get_measured_perf(unsigned int cpu)
340 337
341static unsigned int get_cur_freq_on_cpu(unsigned int cpu) 338static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
342{ 339{
343 cpumask_of_cpu_ptr(cpu_mask, cpu);
344 struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu); 340 struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu);
345 unsigned int freq; 341 unsigned int freq;
346 unsigned int cached_freq; 342 unsigned int cached_freq;
@@ -353,7 +349,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
353 } 349 }
354 350
355 cached_freq = data->freq_table[data->acpi_data->state].frequency; 351 cached_freq = data->freq_table[data->acpi_data->state].frequency;
356 freq = extract_freq(get_cur_val(cpu_mask), data); 352 freq = extract_freq(get_cur_val(&cpumask_of_cpu(cpu)), data);
357 if (freq != cached_freq) { 353 if (freq != cached_freq) {
358 /* 354 /*
359 * The dreaded BIOS frequency change behind our back. 355 * The dreaded BIOS frequency change behind our back.
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 53c7b693697..c45ca6d4dce 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -479,12 +479,11 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvi
479static int check_supported_cpu(unsigned int cpu) 479static int check_supported_cpu(unsigned int cpu)
480{ 480{
481 cpumask_t oldmask; 481 cpumask_t oldmask;
482 cpumask_of_cpu_ptr(cpu_mask, cpu);
483 u32 eax, ebx, ecx, edx; 482 u32 eax, ebx, ecx, edx;
484 unsigned int rc = 0; 483 unsigned int rc = 0;
485 484
486 oldmask = current->cpus_allowed; 485 oldmask = current->cpus_allowed;
487 set_cpus_allowed_ptr(current, cpu_mask); 486 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
488 487
489 if (smp_processor_id() != cpu) { 488 if (smp_processor_id() != cpu) {
490 printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu); 489 printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu);
@@ -1017,7 +1016,6 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i
1017static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) 1016static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation)
1018{ 1017{
1019 cpumask_t oldmask; 1018 cpumask_t oldmask;
1020 cpumask_of_cpu_ptr(cpu_mask, pol->cpu);
1021 struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); 1019 struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
1022 u32 checkfid; 1020 u32 checkfid;
1023 u32 checkvid; 1021 u32 checkvid;
@@ -1032,7 +1030,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
1032 1030
1033 /* only run on specific CPU from here on */ 1031 /* only run on specific CPU from here on */
1034 oldmask = current->cpus_allowed; 1032 oldmask = current->cpus_allowed;
1035 set_cpus_allowed_ptr(current, cpu_mask); 1033 set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu));
1036 1034
1037 if (smp_processor_id() != pol->cpu) { 1035 if (smp_processor_id() != pol->cpu) {
1038 printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); 1036 printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
@@ -1107,7 +1105,6 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1107{ 1105{
1108 struct powernow_k8_data *data; 1106 struct powernow_k8_data *data;
1109 cpumask_t oldmask; 1107 cpumask_t oldmask;
1110 cpumask_of_cpu_ptr_declare(newmask);
1111 int rc; 1108 int rc;
1112 1109
1113 if (!cpu_online(pol->cpu)) 1110 if (!cpu_online(pol->cpu))
@@ -1159,8 +1156,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1159 1156
1160 /* only run on specific CPU from here on */ 1157 /* only run on specific CPU from here on */
1161 oldmask = current->cpus_allowed; 1158 oldmask = current->cpus_allowed;
1162 cpumask_of_cpu_ptr_next(newmask, pol->cpu); 1159 set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu));
1163 set_cpus_allowed_ptr(current, newmask);
1164 1160
1165 if (smp_processor_id() != pol->cpu) { 1161 if (smp_processor_id() != pol->cpu) {
1166 printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); 1162 printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
@@ -1182,7 +1178,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1182 set_cpus_allowed_ptr(current, &oldmask); 1178 set_cpus_allowed_ptr(current, &oldmask);
1183 1179
1184 if (cpu_family == CPU_HW_PSTATE) 1180 if (cpu_family == CPU_HW_PSTATE)
1185 pol->cpus = *newmask; 1181 pol->cpus = cpumask_of_cpu(pol->cpu);
1186 else 1182 else
1187 pol->cpus = per_cpu(cpu_core_map, pol->cpu); 1183 pol->cpus = per_cpu(cpu_core_map, pol->cpu);
1188 data->available_cores = &(pol->cpus); 1184 data->available_cores = &(pol->cpus);
@@ -1248,7 +1244,6 @@ static unsigned int powernowk8_get (unsigned int cpu)
1248{ 1244{
1249 struct powernow_k8_data *data; 1245 struct powernow_k8_data *data;
1250 cpumask_t oldmask = current->cpus_allowed; 1246 cpumask_t oldmask = current->cpus_allowed;
1251 cpumask_of_cpu_ptr(newmask, cpu);
1252 unsigned int khz = 0; 1247 unsigned int khz = 0;
1253 unsigned int first; 1248 unsigned int first;
1254 1249
@@ -1258,7 +1253,7 @@ static unsigned int powernowk8_get (unsigned int cpu)
1258 if (!data) 1253 if (!data)
1259 return -EINVAL; 1254 return -EINVAL;
1260 1255
1261 set_cpus_allowed_ptr(current, newmask); 1256 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
1262 if (smp_processor_id() != cpu) { 1257 if (smp_processor_id() != cpu) {
1263 printk(KERN_ERR PFX 1258 printk(KERN_ERR PFX
1264 "limiting to CPU %d failed in powernowk8_get\n", cpu); 1259 "limiting to CPU %d failed in powernowk8_get\n", cpu);
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index ca2ac13b7af..15e13c01cc3 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -324,10 +324,9 @@ static unsigned int get_cur_freq(unsigned int cpu)
324 unsigned l, h; 324 unsigned l, h;
325 unsigned clock_freq; 325 unsigned clock_freq;
326 cpumask_t saved_mask; 326 cpumask_t saved_mask;
327 cpumask_of_cpu_ptr(new_mask, cpu);
328 327
329 saved_mask = current->cpus_allowed; 328 saved_mask = current->cpus_allowed;
330 set_cpus_allowed_ptr(current, new_mask); 329 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
331 if (smp_processor_id() != cpu) 330 if (smp_processor_id() != cpu)
332 return 0; 331 return 0;
333 332
@@ -585,15 +584,12 @@ static int centrino_target (struct cpufreq_policy *policy,
585 * Best effort undo.. 584 * Best effort undo..
586 */ 585 */
587 586
588 if (!cpus_empty(*covered_cpus)) { 587 if (!cpus_empty(*covered_cpus))
589 cpumask_of_cpu_ptr_declare(new_mask);
590
591 for_each_cpu_mask_nr(j, *covered_cpus) { 588 for_each_cpu_mask_nr(j, *covered_cpus) {
592 cpumask_of_cpu_ptr_next(new_mask, j); 589 set_cpus_allowed_ptr(current,
593 set_cpus_allowed_ptr(current, new_mask); 590 &cpumask_of_cpu(j));
594 wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); 591 wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
595 } 592 }
596 }
597 593
598 tmp = freqs.new; 594 tmp = freqs.new;
599 freqs.new = freqs.old; 595 freqs.new = freqs.old;
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 2f3728dc24f..191f7263c61 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -244,8 +244,7 @@ static unsigned int _speedstep_get(const cpumask_t *cpus)
244 244
245static unsigned int speedstep_get(unsigned int cpu) 245static unsigned int speedstep_get(unsigned int cpu)
246{ 246{
247 cpumask_of_cpu_ptr(newmask, cpu); 247 return _speedstep_get(&cpumask_of_cpu(cpu));
248 return _speedstep_get(newmask);
249} 248}
250 249
251/** 250/**
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 650d40f7912..6b0a10b002f 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -516,7 +516,6 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
516 unsigned long j; 516 unsigned long j;
517 int retval; 517 int retval;
518 cpumask_t oldmask; 518 cpumask_t oldmask;
519 cpumask_of_cpu_ptr(newmask, cpu);
520 519
521 if (num_cache_leaves == 0) 520 if (num_cache_leaves == 0)
522 return -ENOENT; 521 return -ENOENT;
@@ -527,7 +526,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
527 return -ENOMEM; 526 return -ENOMEM;
528 527
529 oldmask = current->cpus_allowed; 528 oldmask = current->cpus_allowed;
530 retval = set_cpus_allowed_ptr(current, newmask); 529 retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
531 if (retval) 530 if (retval)
532 goto out; 531 goto out;
533 532
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 3fee2aa50f3..b68e21f06f4 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -62,12 +62,10 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
62 62
63 if (reload) { 63 if (reload) {
64#ifdef CONFIG_SMP 64#ifdef CONFIG_SMP
65 cpumask_of_cpu_ptr_declare(mask);
66
67 preempt_disable(); 65 preempt_disable();
68 load_LDT(pc); 66 load_LDT(pc);
69 cpumask_of_cpu_ptr_next(mask, smp_processor_id()); 67 if (!cpus_equal(current->mm->cpu_vm_mask,
70 if (!cpus_equal(current->mm->cpu_vm_mask, *mask)) 68 cpumask_of_cpu(smp_processor_id())))
71 smp_call_function(flush_ldt, current->mm, 1); 69 smp_call_function(flush_ldt, current->mm, 1);
72 preempt_enable(); 70 preempt_enable();
73#else 71#else
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
index 6994c751590..652fa5c38eb 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode.c
@@ -388,7 +388,6 @@ static int do_microcode_update (void)
388 void *new_mc = NULL; 388 void *new_mc = NULL;
389 int cpu; 389 int cpu;
390 cpumask_t old; 390 cpumask_t old;
391 cpumask_of_cpu_ptr_declare(newmask);
392 391
393 old = current->cpus_allowed; 392 old = current->cpus_allowed;
394 393
@@ -405,8 +404,7 @@ static int do_microcode_update (void)
405 404
406 if (!uci->valid) 405 if (!uci->valid)
407 continue; 406 continue;
408 cpumask_of_cpu_ptr_next(newmask, cpu); 407 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
409 set_cpus_allowed_ptr(current, newmask);
410 error = get_maching_microcode(new_mc, cpu); 408 error = get_maching_microcode(new_mc, cpu);
411 if (error < 0) 409 if (error < 0)
412 goto out; 410 goto out;
@@ -576,7 +574,6 @@ static int apply_microcode_check_cpu(int cpu)
576 struct cpuinfo_x86 *c = &cpu_data(cpu); 574 struct cpuinfo_x86 *c = &cpu_data(cpu);
577 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 575 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
578 cpumask_t old; 576 cpumask_t old;
579 cpumask_of_cpu_ptr(newmask, cpu);
580 unsigned int val[2]; 577 unsigned int val[2];
581 int err = 0; 578 int err = 0;
582 579
@@ -585,7 +582,7 @@ static int apply_microcode_check_cpu(int cpu)
585 return 0; 582 return 0;
586 583
587 old = current->cpus_allowed; 584 old = current->cpus_allowed;
588 set_cpus_allowed_ptr(current, newmask); 585 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
589 586
590 /* Check if the microcode we have in memory matches the CPU */ 587 /* Check if the microcode we have in memory matches the CPU */
591 if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || 588 if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
@@ -623,12 +620,11 @@ static int apply_microcode_check_cpu(int cpu)
623static void microcode_init_cpu(int cpu, int resume) 620static void microcode_init_cpu(int cpu, int resume)
624{ 621{
625 cpumask_t old; 622 cpumask_t old;
626 cpumask_of_cpu_ptr(newmask, cpu);
627 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 623 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
628 624
629 old = current->cpus_allowed; 625 old = current->cpus_allowed;
630 626
631 set_cpus_allowed_ptr(current, newmask); 627 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
632 mutex_lock(&microcode_mutex); 628 mutex_lock(&microcode_mutex);
633 collect_cpu_info(cpu); 629 collect_cpu_info(cpu);
634 if (uci->valid && system_state == SYSTEM_RUNNING && !resume) 630 if (uci->valid && system_state == SYSTEM_RUNNING && !resume)
@@ -661,13 +657,10 @@ static ssize_t reload_store(struct sys_device *dev,
661 if (end == buf) 657 if (end == buf)
662 return -EINVAL; 658 return -EINVAL;
663 if (val == 1) { 659 if (val == 1) {
664 cpumask_t old; 660 cpumask_t old = current->cpus_allowed;
665 cpumask_of_cpu_ptr(newmask, cpu);
666
667 old = current->cpus_allowed;
668 661
669 get_online_cpus(); 662 get_online_cpus();
670 set_cpus_allowed_ptr(current, newmask); 663 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
671 664
672 mutex_lock(&microcode_mutex); 665 mutex_lock(&microcode_mutex);
673 if (uci->valid) 666 if (uci->valid)
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 06a9f643817..724adfc63cb 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -414,25 +414,20 @@ void native_machine_shutdown(void)
414 414
415 /* The boot cpu is always logical cpu 0 */ 415 /* The boot cpu is always logical cpu 0 */
416 int reboot_cpu_id = 0; 416 int reboot_cpu_id = 0;
417 cpumask_of_cpu_ptr(newmask, reboot_cpu_id);
418 417
419#ifdef CONFIG_X86_32 418#ifdef CONFIG_X86_32
420 /* See if there has been given a command line override */ 419 /* See if there has been given a command line override */
421 if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) && 420 if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) &&
422 cpu_online(reboot_cpu)) { 421 cpu_online(reboot_cpu))
423 reboot_cpu_id = reboot_cpu; 422 reboot_cpu_id = reboot_cpu;
424 cpumask_of_cpu_ptr_next(newmask, reboot_cpu_id);
425 }
426#endif 423#endif
427 424
428 /* Make certain the cpu I'm about to reboot on is online */ 425 /* Make certain the cpu I'm about to reboot on is online */
429 if (!cpu_online(reboot_cpu_id)) { 426 if (!cpu_online(reboot_cpu_id))
430 reboot_cpu_id = smp_processor_id(); 427 reboot_cpu_id = smp_processor_id();
431 cpumask_of_cpu_ptr_next(newmask, reboot_cpu_id);
432 }
433 428
434 /* Make certain I only run on the appropriate processor */ 429 /* Make certain I only run on the appropriate processor */
435 set_cpus_allowed_ptr(current, newmask); 430 set_cpus_allowed_ptr(current, &cpumask_of_cpu(reboot_cpu_id));
436 431
437 /* O.K Now that I'm on the appropriate processor, 432 /* O.K Now that I'm on the appropriate processor,
438 * stop all of the others. 433 * stop all of the others.
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index f7745f94c00..76e305e064f 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -80,24 +80,6 @@ static void __init setup_per_cpu_maps(void)
80#endif 80#endif
81} 81}
82 82
83#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
84cpumask_t *cpumask_of_cpu_map __read_mostly;
85EXPORT_SYMBOL(cpumask_of_cpu_map);
86
87/* requires nr_cpu_ids to be initialized */
88static void __init setup_cpumask_of_cpu(void)
89{
90 int i;
91
92 /* alloc_bootmem zeroes memory */
93 cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids);
94 for (i = 0; i < nr_cpu_ids; i++)
95 cpu_set(i, cpumask_of_cpu_map[i]);
96}
97#else
98static inline void setup_cpumask_of_cpu(void) { }
99#endif
100
101#ifdef CONFIG_X86_32 83#ifdef CONFIG_X86_32
102/* 84/*
103 * Great future not-so-futuristic plan: make i386 and x86_64 do it 85 * Great future not-so-futuristic plan: make i386 and x86_64 do it
@@ -197,9 +179,6 @@ void __init setup_per_cpu_areas(void)
197 179
198 /* Setup node to cpumask map */ 180 /* Setup node to cpumask map */
199 setup_node_to_cpumask_map(); 181 setup_node_to_cpumask_map();
200
201 /* Setup cpumask_of_cpu map */
202 setup_cpumask_of_cpu();
203} 182}
204 183
205#endif 184#endif
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 8d45fabc5f3..ce3251ce550 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -21,6 +21,7 @@ config KVM
21 tristate "Kernel-based Virtual Machine (KVM) support" 21 tristate "Kernel-based Virtual Machine (KVM) support"
22 depends on HAVE_KVM 22 depends on HAVE_KVM
23 select PREEMPT_NOTIFIERS 23 select PREEMPT_NOTIFIERS
24 select MMU_NOTIFIER
24 select ANON_INODES 25 select ANON_INODES
25 ---help--- 26 ---help---
26 Support hosting fully virtualized guest machines using hardware 27 Support hosting fully virtualized guest machines using hardware
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 3085f25b435..007bb06c750 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -223,14 +223,17 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
223 struct page **pages) 223 struct page **pages)
224{ 224{
225 struct mm_struct *mm = current->mm; 225 struct mm_struct *mm = current->mm;
226 unsigned long end = start + (nr_pages << PAGE_SHIFT); 226 unsigned long addr, len, end;
227 unsigned long addr = start;
228 unsigned long next; 227 unsigned long next;
229 pgd_t *pgdp; 228 pgd_t *pgdp;
230 int nr = 0; 229 int nr = 0;
231 230
231 start &= PAGE_MASK;
232 addr = start;
233 len = (unsigned long) nr_pages << PAGE_SHIFT;
234 end = start + len;
232 if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, 235 if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
233 start, nr_pages*PAGE_SIZE))) 236 start, len)))
234 goto slow_irqon; 237 goto slow_irqon;
235 238
236 /* 239 /*
diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c
index a2c3f9cfa54..a56fc6c4394 100644
--- a/drivers/acpi/processor_throttling.c
+++ b/drivers/acpi/processor_throttling.c
@@ -827,7 +827,6 @@ static int acpi_processor_get_throttling_ptc(struct acpi_processor *pr)
827static int acpi_processor_get_throttling(struct acpi_processor *pr) 827static int acpi_processor_get_throttling(struct acpi_processor *pr)
828{ 828{
829 cpumask_t saved_mask; 829 cpumask_t saved_mask;
830 cpumask_of_cpu_ptr_declare(new_mask);
831 int ret; 830 int ret;
832 831
833 if (!pr) 832 if (!pr)
@@ -839,8 +838,7 @@ static int acpi_processor_get_throttling(struct acpi_processor *pr)
839 * Migrate task to the cpu pointed by pr. 838 * Migrate task to the cpu pointed by pr.
840 */ 839 */
841 saved_mask = current->cpus_allowed; 840 saved_mask = current->cpus_allowed;
842 cpumask_of_cpu_ptr_next(new_mask, pr->id); 841 set_cpus_allowed_ptr(current, &cpumask_of_cpu(pr->id));
843 set_cpus_allowed_ptr(current, new_mask);
844 ret = pr->throttling.acpi_processor_get_throttling(pr); 842 ret = pr->throttling.acpi_processor_get_throttling(pr);
845 /* restore the previous state */ 843 /* restore the previous state */
846 set_cpus_allowed_ptr(current, &saved_mask); 844 set_cpus_allowed_ptr(current, &saved_mask);
@@ -989,7 +987,6 @@ static int acpi_processor_set_throttling_ptc(struct acpi_processor *pr,
989int acpi_processor_set_throttling(struct acpi_processor *pr, int state) 987int acpi_processor_set_throttling(struct acpi_processor *pr, int state)
990{ 988{
991 cpumask_t saved_mask; 989 cpumask_t saved_mask;
992 cpumask_of_cpu_ptr_declare(new_mask);
993 int ret = 0; 990 int ret = 0;
994 unsigned int i; 991 unsigned int i;
995 struct acpi_processor *match_pr; 992 struct acpi_processor *match_pr;
@@ -1028,8 +1025,7 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state)
1028 * it can be called only for the cpu pointed by pr. 1025 * it can be called only for the cpu pointed by pr.
1029 */ 1026 */
1030 if (p_throttling->shared_type == DOMAIN_COORD_TYPE_SW_ANY) { 1027 if (p_throttling->shared_type == DOMAIN_COORD_TYPE_SW_ANY) {
1031 cpumask_of_cpu_ptr_next(new_mask, pr->id); 1028 set_cpus_allowed_ptr(current, &cpumask_of_cpu(pr->id));
1032 set_cpus_allowed_ptr(current, new_mask);
1033 ret = p_throttling->acpi_processor_set_throttling(pr, 1029 ret = p_throttling->acpi_processor_set_throttling(pr,
1034 t_state.target_state); 1030 t_state.target_state);
1035 } else { 1031 } else {
@@ -1060,8 +1056,7 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state)
1060 continue; 1056 continue;
1061 } 1057 }
1062 t_state.cpu = i; 1058 t_state.cpu = i;
1063 cpumask_of_cpu_ptr_next(new_mask, i); 1059 set_cpus_allowed_ptr(current, &cpumask_of_cpu(i));
1064 set_cpus_allowed_ptr(current, new_mask);
1065 ret = match_pr->throttling. 1060 ret = match_pr->throttling.
1066 acpi_processor_set_throttling( 1061 acpi_processor_set_throttling(
1067 match_pr, t_state.target_state); 1062 match_pr, t_state.target_state);
diff --git a/drivers/char/pcmcia/ipwireless/hardware.c b/drivers/char/pcmcia/ipwireless/hardware.c
index 7d500f82195..4c1820cad71 100644
--- a/drivers/char/pcmcia/ipwireless/hardware.c
+++ b/drivers/char/pcmcia/ipwireless/hardware.c
@@ -568,7 +568,7 @@ static struct ipw_rx_packet *pool_allocate(struct ipw_hardware *hw,
568 list_del(&packet->queue); 568 list_del(&packet->queue);
569 } else { 569 } else {
570 const int min_capacity = 570 const int min_capacity =
571 ipwireless_ppp_mru(hw->network + 2); 571 ipwireless_ppp_mru(hw->network) + 2;
572 int new_capacity; 572 int new_capacity;
573 573
574 spin_unlock_irqrestore(&hw->lock, flags); 574 spin_unlock_irqrestore(&hw->lock, flags);
diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c
index c66817e7717..50a071f1c94 100644
--- a/drivers/firmware/dcdbas.c
+++ b/drivers/firmware/dcdbas.c
@@ -245,7 +245,6 @@ static ssize_t host_control_on_shutdown_store(struct device *dev,
245static int smi_request(struct smi_cmd *smi_cmd) 245static int smi_request(struct smi_cmd *smi_cmd)
246{ 246{
247 cpumask_t old_mask; 247 cpumask_t old_mask;
248 cpumask_of_cpu_ptr(new_mask, 0);
249 int ret = 0; 248 int ret = 0;
250 249
251 if (smi_cmd->magic != SMI_CMD_MAGIC) { 250 if (smi_cmd->magic != SMI_CMD_MAGIC) {
@@ -256,7 +255,7 @@ static int smi_request(struct smi_cmd *smi_cmd)
256 255
257 /* SMI requires CPU 0 */ 256 /* SMI requires CPU 0 */
258 old_mask = current->cpus_allowed; 257 old_mask = current->cpus_allowed;
259 set_cpus_allowed_ptr(current, new_mask); 258 set_cpus_allowed_ptr(current, &cpumask_of_cpu(0));
260 if (smp_processor_id() != 0) { 259 if (smp_processor_id() != 0) {
261 dev_dbg(&dcdbas_pdev->dev, "%s: failed to get CPU 0\n", 260 dev_dbg(&dcdbas_pdev->dev, "%s: failed to get CPU 0\n",
262 __func__); 261 __func__);
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index 579b01ff82d..c3b4227f48a 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -229,11 +229,10 @@ xpc_hb_checker(void *ignore)
229 int last_IRQ_count = 0; 229 int last_IRQ_count = 0;
230 int new_IRQ_count; 230 int new_IRQ_count;
231 int force_IRQ = 0; 231 int force_IRQ = 0;
232 cpumask_of_cpu_ptr(cpumask, XPC_HB_CHECK_CPU);
233 232
234 /* this thread was marked active by xpc_hb_init() */ 233 /* this thread was marked active by xpc_hb_init() */
235 234
236 set_cpus_allowed_ptr(current, cpumask); 235 set_cpus_allowed_ptr(current, &cpumask_of_cpu(XPC_HB_CHECK_CPU));
237 236
238 /* set our heartbeating to other partitions into motion */ 237 /* set our heartbeating to other partitions into motion */
239 xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ); 238 xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ);
diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index 7a4409ab30e..a319a20ed44 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -8,6 +8,7 @@
8 8
9#include <linux/kthread.h> 9#include <linux/kthread.h>
10#include <linux/vmalloc.h> 10#include <linux/vmalloc.h>
11#include <linux/delay.h>
11 12
12static int qla24xx_vport_disable(struct fc_vport *, bool); 13static int qla24xx_vport_disable(struct fc_vport *, bool);
13 14
diff --git a/drivers/spi/mpc52xx_psc_spi.c b/drivers/spi/mpc52xx_psc_spi.c
index 604e5f0a2d9..25eda71f4bf 100644
--- a/drivers/spi/mpc52xx_psc_spi.c
+++ b/drivers/spi/mpc52xx_psc_spi.c
@@ -148,7 +148,6 @@ static int mpc52xx_psc_spi_transfer_rxtx(struct spi_device *spi,
148 unsigned rfalarm; 148 unsigned rfalarm;
149 unsigned send_at_once = MPC52xx_PSC_BUFSIZE; 149 unsigned send_at_once = MPC52xx_PSC_BUFSIZE;
150 unsigned recv_at_once; 150 unsigned recv_at_once;
151 unsigned bpw = mps->bits_per_word / 8;
152 151
153 if (!t->tx_buf && !t->rx_buf && t->len) 152 if (!t->tx_buf && !t->rx_buf && t->len)
154 return -EINVAL; 153 return -EINVAL;
@@ -164,22 +163,15 @@ static int mpc52xx_psc_spi_transfer_rxtx(struct spi_device *spi,
164 } 163 }
165 164
166 dev_dbg(&spi->dev, "send %d bytes...\n", send_at_once); 165 dev_dbg(&spi->dev, "send %d bytes...\n", send_at_once);
167 if (tx_buf) { 166 for (; send_at_once; sb++, send_at_once--) {
168 for (; send_at_once; sb++, send_at_once--) { 167 /* set EOF flag before the last word is sent */
169 /* set EOF flag */ 168 if (send_at_once == 1)
170 if (mps->bits_per_word 169 out_8(&psc->ircr2, 0x01);
171 && (sb + 1) % bpw == 0) 170
172 out_8(&psc->ircr2, 0x01); 171 if (tx_buf)
173 out_8(&psc->mpc52xx_psc_buffer_8, tx_buf[sb]); 172 out_8(&psc->mpc52xx_psc_buffer_8, tx_buf[sb]);
174 } 173 else
175 } else {
176 for (; send_at_once; sb++, send_at_once--) {
177 /* set EOF flag */
178 if (mps->bits_per_word
179 && ((sb + 1) % bpw) == 0)
180 out_8(&psc->ircr2, 0x01);
181 out_8(&psc->mpc52xx_psc_buffer_8, 0); 174 out_8(&psc->mpc52xx_psc_buffer_8, 0);
182 }
183 } 175 }
184 176
185 177
diff --git a/drivers/spi/spi_s3c24xx.c b/drivers/spi/spi_s3c24xx.c
index 0885cc357a3..1c643c9e1f1 100644
--- a/drivers/spi/spi_s3c24xx.c
+++ b/drivers/spi/spi_s3c24xx.c
@@ -270,6 +270,7 @@ static int __init s3c24xx_spi_probe(struct platform_device *pdev)
270 /* setup the master state. */ 270 /* setup the master state. */
271 271
272 master->num_chipselect = hw->pdata->num_cs; 272 master->num_chipselect = hw->pdata->num_cs;
273 master->bus_num = pdata->bus_num;
273 274
274 /* setup the state for the bitbang driver */ 275 /* setup the state for the bitbang driver */
275 276
diff --git a/drivers/video/sh7760fb.c b/drivers/video/sh7760fb.c
index 4d0e28c5790..8d0212da451 100644
--- a/drivers/video/sh7760fb.c
+++ b/drivers/video/sh7760fb.c
@@ -152,6 +152,7 @@ static int sh7760fb_setcmap(struct fb_cmap *cmap, struct fb_info *info)
152 col |= ((*g) & 0xff) << 8; 152 col |= ((*g) & 0xff) << 8;
153 col |= ((*b) & 0xff); 153 col |= ((*b) & 0xff);
154 col &= SH7760FB_PALETTE_MASK; 154 col &= SH7760FB_PALETTE_MASK;
155 iowrite32(col, par->base + LDPR(s));
155 156
156 if (s < 16) 157 if (s < 16)
157 ((u32 *) (info->pseudo_palette))[s] = s; 158 ((u32 *) (info->pseudo_palette))[s] = s;
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 63e2ee63058..c3e174b35fe 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -705,7 +705,6 @@ void __init bio_integrity_init_slab(void)
705 bio_integrity_slab = KMEM_CACHE(bio_integrity_payload, 705 bio_integrity_slab = KMEM_CACHE(bio_integrity_payload,
706 SLAB_HWCACHE_ALIGN|SLAB_PANIC); 706 SLAB_HWCACHE_ALIGN|SLAB_PANIC);
707} 707}
708EXPORT_SYMBOL(bio_integrity_init_slab);
709 708
710static int __init integrity_init(void) 709static int __init integrity_init(void)
711{ 710{
diff --git a/fs/buffer.c b/fs/buffer.c
index f9580501963..ca12a6bb82b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2096,6 +2096,52 @@ int generic_write_end(struct file *file, struct address_space *mapping,
2096EXPORT_SYMBOL(generic_write_end); 2096EXPORT_SYMBOL(generic_write_end);
2097 2097
2098/* 2098/*
2099 * block_is_partially_uptodate checks whether buffers within a page are
2100 * uptodate or not.
2101 *
2102 * Returns true if all buffers which correspond to a file portion
2103 * we want to read are uptodate.
2104 */
2105int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
2106 unsigned long from)
2107{
2108 struct inode *inode = page->mapping->host;
2109 unsigned block_start, block_end, blocksize;
2110 unsigned to;
2111 struct buffer_head *bh, *head;
2112 int ret = 1;
2113
2114 if (!page_has_buffers(page))
2115 return 0;
2116
2117 blocksize = 1 << inode->i_blkbits;
2118 to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count);
2119 to = from + to;
2120 if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
2121 return 0;
2122
2123 head = page_buffers(page);
2124 bh = head;
2125 block_start = 0;
2126 do {
2127 block_end = block_start + blocksize;
2128 if (block_end > from && block_start < to) {
2129 if (!buffer_uptodate(bh)) {
2130 ret = 0;
2131 break;
2132 }
2133 if (block_end >= to)
2134 break;
2135 }
2136 block_start = block_end;
2137 bh = bh->b_this_page;
2138 } while (bh != head);
2139
2140 return ret;
2141}
2142EXPORT_SYMBOL(block_is_partially_uptodate);
2143
2144/*
2099 * Generic "read page" function for block devices that have the normal 2145 * Generic "read page" function for block devices that have the normal
2100 * get_block functionality. This is most of the block device filesystems. 2146 * get_block functionality. This is most of the block device filesystems.
2101 * Reads the page asynchronously --- the unlock_buffer() and 2147 * Reads the page asynchronously --- the unlock_buffer() and
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 7b99917ffad..06db79d05c1 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -475,8 +475,8 @@ int ecryptfs_encrypt_page(struct page *page)
475{ 475{
476 struct inode *ecryptfs_inode; 476 struct inode *ecryptfs_inode;
477 struct ecryptfs_crypt_stat *crypt_stat; 477 struct ecryptfs_crypt_stat *crypt_stat;
478 char *enc_extent_virt = NULL; 478 char *enc_extent_virt;
479 struct page *enc_extent_page; 479 struct page *enc_extent_page = NULL;
480 loff_t extent_offset; 480 loff_t extent_offset;
481 int rc = 0; 481 int rc = 0;
482 482
@@ -492,14 +492,14 @@ int ecryptfs_encrypt_page(struct page *page)
492 page->index); 492 page->index);
493 goto out; 493 goto out;
494 } 494 }
495 enc_extent_virt = kmalloc(PAGE_CACHE_SIZE, GFP_USER); 495 enc_extent_page = alloc_page(GFP_USER);
496 if (!enc_extent_virt) { 496 if (!enc_extent_page) {
497 rc = -ENOMEM; 497 rc = -ENOMEM;
498 ecryptfs_printk(KERN_ERR, "Error allocating memory for " 498 ecryptfs_printk(KERN_ERR, "Error allocating memory for "
499 "encrypted extent\n"); 499 "encrypted extent\n");
500 goto out; 500 goto out;
501 } 501 }
502 enc_extent_page = virt_to_page(enc_extent_virt); 502 enc_extent_virt = kmap(enc_extent_page);
503 for (extent_offset = 0; 503 for (extent_offset = 0;
504 extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); 504 extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size);
505 extent_offset++) { 505 extent_offset++) {
@@ -527,7 +527,10 @@ int ecryptfs_encrypt_page(struct page *page)
527 } 527 }
528 } 528 }
529out: 529out:
530 kfree(enc_extent_virt); 530 if (enc_extent_page) {
531 kunmap(enc_extent_page);
532 __free_page(enc_extent_page);
533 }
531 return rc; 534 return rc;
532} 535}
533 536
@@ -609,8 +612,8 @@ int ecryptfs_decrypt_page(struct page *page)
609{ 612{
610 struct inode *ecryptfs_inode; 613 struct inode *ecryptfs_inode;
611 struct ecryptfs_crypt_stat *crypt_stat; 614 struct ecryptfs_crypt_stat *crypt_stat;
612 char *enc_extent_virt = NULL; 615 char *enc_extent_virt;
613 struct page *enc_extent_page; 616 struct page *enc_extent_page = NULL;
614 unsigned long extent_offset; 617 unsigned long extent_offset;
615 int rc = 0; 618 int rc = 0;
616 619
@@ -627,14 +630,14 @@ int ecryptfs_decrypt_page(struct page *page)
627 page->index); 630 page->index);
628 goto out; 631 goto out;
629 } 632 }
630 enc_extent_virt = kmalloc(PAGE_CACHE_SIZE, GFP_USER); 633 enc_extent_page = alloc_page(GFP_USER);
631 if (!enc_extent_virt) { 634 if (!enc_extent_page) {
632 rc = -ENOMEM; 635 rc = -ENOMEM;
633 ecryptfs_printk(KERN_ERR, "Error allocating memory for " 636 ecryptfs_printk(KERN_ERR, "Error allocating memory for "
634 "encrypted extent\n"); 637 "encrypted extent\n");
635 goto out; 638 goto out;
636 } 639 }
637 enc_extent_page = virt_to_page(enc_extent_virt); 640 enc_extent_virt = kmap(enc_extent_page);
638 for (extent_offset = 0; 641 for (extent_offset = 0;
639 extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); 642 extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size);
640 extent_offset++) { 643 extent_offset++) {
@@ -662,7 +665,10 @@ int ecryptfs_decrypt_page(struct page *page)
662 } 665 }
663 } 666 }
664out: 667out:
665 kfree(enc_extent_virt); 668 if (enc_extent_page) {
669 kunmap(enc_extent_page);
670 __free_page(enc_extent_page);
671 }
666 return rc; 672 return rc;
667} 673}
668 674
diff --git a/fs/exec.c b/fs/exec.c
index 9696bbf0f0b..32993beecbe 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -32,6 +32,7 @@
32#include <linux/swap.h> 32#include <linux/swap.h>
33#include <linux/string.h> 33#include <linux/string.h>
34#include <linux/init.h> 34#include <linux/init.h>
35#include <linux/pagemap.h>
35#include <linux/highmem.h> 36#include <linux/highmem.h>
36#include <linux/spinlock.h> 37#include <linux/spinlock.h>
37#include <linux/key.h> 38#include <linux/key.h>
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 384fc0d1dd7..991d6dfeb51 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -791,6 +791,7 @@ const struct address_space_operations ext2_aops = {
791 .direct_IO = ext2_direct_IO, 791 .direct_IO = ext2_direct_IO,
792 .writepages = ext2_writepages, 792 .writepages = ext2_writepages,
793 .migratepage = buffer_migrate_page, 793 .migratepage = buffer_migrate_page,
794 .is_partially_uptodate = block_is_partially_uptodate,
794}; 795};
795 796
796const struct address_space_operations ext2_aops_xip = { 797const struct address_space_operations ext2_aops_xip = {
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 3bf07d70b91..507d8689b11 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1767,44 +1767,47 @@ static int ext3_journalled_set_page_dirty(struct page *page)
1767} 1767}
1768 1768
1769static const struct address_space_operations ext3_ordered_aops = { 1769static const struct address_space_operations ext3_ordered_aops = {
1770 .readpage = ext3_readpage, 1770 .readpage = ext3_readpage,
1771 .readpages = ext3_readpages, 1771 .readpages = ext3_readpages,
1772 .writepage = ext3_ordered_writepage, 1772 .writepage = ext3_ordered_writepage,
1773 .sync_page = block_sync_page, 1773 .sync_page = block_sync_page,
1774 .write_begin = ext3_write_begin, 1774 .write_begin = ext3_write_begin,
1775 .write_end = ext3_ordered_write_end, 1775 .write_end = ext3_ordered_write_end,
1776 .bmap = ext3_bmap, 1776 .bmap = ext3_bmap,
1777 .invalidatepage = ext3_invalidatepage, 1777 .invalidatepage = ext3_invalidatepage,
1778 .releasepage = ext3_releasepage, 1778 .releasepage = ext3_releasepage,
1779 .direct_IO = ext3_direct_IO, 1779 .direct_IO = ext3_direct_IO,
1780 .migratepage = buffer_migrate_page, 1780 .migratepage = buffer_migrate_page,
1781 .is_partially_uptodate = block_is_partially_uptodate,
1781}; 1782};
1782 1783
1783static const struct address_space_operations ext3_writeback_aops = { 1784static const struct address_space_operations ext3_writeback_aops = {
1784 .readpage = ext3_readpage, 1785 .readpage = ext3_readpage,
1785 .readpages = ext3_readpages, 1786 .readpages = ext3_readpages,
1786 .writepage = ext3_writeback_writepage, 1787 .writepage = ext3_writeback_writepage,
1787 .sync_page = block_sync_page, 1788 .sync_page = block_sync_page,
1788 .write_begin = ext3_write_begin, 1789 .write_begin = ext3_write_begin,
1789 .write_end = ext3_writeback_write_end, 1790 .write_end = ext3_writeback_write_end,
1790 .bmap = ext3_bmap, 1791 .bmap = ext3_bmap,
1791 .invalidatepage = ext3_invalidatepage, 1792 .invalidatepage = ext3_invalidatepage,
1792 .releasepage = ext3_releasepage, 1793 .releasepage = ext3_releasepage,
1793 .direct_IO = ext3_direct_IO, 1794 .direct_IO = ext3_direct_IO,
1794 .migratepage = buffer_migrate_page, 1795 .migratepage = buffer_migrate_page,
1796 .is_partially_uptodate = block_is_partially_uptodate,
1795}; 1797};
1796 1798
1797static const struct address_space_operations ext3_journalled_aops = { 1799static const struct address_space_operations ext3_journalled_aops = {
1798 .readpage = ext3_readpage, 1800 .readpage = ext3_readpage,
1799 .readpages = ext3_readpages, 1801 .readpages = ext3_readpages,
1800 .writepage = ext3_journalled_writepage, 1802 .writepage = ext3_journalled_writepage,
1801 .sync_page = block_sync_page, 1803 .sync_page = block_sync_page,
1802 .write_begin = ext3_write_begin, 1804 .write_begin = ext3_write_begin,
1803 .write_end = ext3_journalled_write_end, 1805 .write_end = ext3_journalled_write_end,
1804 .set_page_dirty = ext3_journalled_set_page_dirty, 1806 .set_page_dirty = ext3_journalled_set_page_dirty,
1805 .bmap = ext3_bmap, 1807 .bmap = ext3_bmap,
1806 .invalidatepage = ext3_invalidatepage, 1808 .invalidatepage = ext3_invalidatepage,
1807 .releasepage = ext3_releasepage, 1809 .releasepage = ext3_releasepage,
1810 .is_partially_uptodate = block_is_partially_uptodate,
1808}; 1811};
1809 1812
1810void ext3_set_aops(struct inode *inode) 1813void ext3_set_aops(struct inode *inode)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8ca2763df09..9843b046c23 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2806,59 +2806,63 @@ static int ext4_journalled_set_page_dirty(struct page *page)
2806} 2806}
2807 2807
2808static const struct address_space_operations ext4_ordered_aops = { 2808static const struct address_space_operations ext4_ordered_aops = {
2809 .readpage = ext4_readpage, 2809 .readpage = ext4_readpage,
2810 .readpages = ext4_readpages, 2810 .readpages = ext4_readpages,
2811 .writepage = ext4_normal_writepage, 2811 .writepage = ext4_normal_writepage,
2812 .sync_page = block_sync_page, 2812 .sync_page = block_sync_page,
2813 .write_begin = ext4_write_begin, 2813 .write_begin = ext4_write_begin,
2814 .write_end = ext4_ordered_write_end, 2814 .write_end = ext4_ordered_write_end,
2815 .bmap = ext4_bmap, 2815 .bmap = ext4_bmap,
2816 .invalidatepage = ext4_invalidatepage, 2816 .invalidatepage = ext4_invalidatepage,
2817 .releasepage = ext4_releasepage, 2817 .releasepage = ext4_releasepage,
2818 .direct_IO = ext4_direct_IO, 2818 .direct_IO = ext4_direct_IO,
2819 .migratepage = buffer_migrate_page, 2819 .migratepage = buffer_migrate_page,
2820 .is_partially_uptodate = block_is_partially_uptodate,
2820}; 2821};
2821 2822
2822static const struct address_space_operations ext4_writeback_aops = { 2823static const struct address_space_operations ext4_writeback_aops = {
2823 .readpage = ext4_readpage, 2824 .readpage = ext4_readpage,
2824 .readpages = ext4_readpages, 2825 .readpages = ext4_readpages,
2825 .writepage = ext4_normal_writepage, 2826 .writepage = ext4_normal_writepage,
2826 .sync_page = block_sync_page, 2827 .sync_page = block_sync_page,
2827 .write_begin = ext4_write_begin, 2828 .write_begin = ext4_write_begin,
2828 .write_end = ext4_writeback_write_end, 2829 .write_end = ext4_writeback_write_end,
2829 .bmap = ext4_bmap, 2830 .bmap = ext4_bmap,
2830 .invalidatepage = ext4_invalidatepage, 2831 .invalidatepage = ext4_invalidatepage,
2831 .releasepage = ext4_releasepage, 2832 .releasepage = ext4_releasepage,
2832 .direct_IO = ext4_direct_IO, 2833 .direct_IO = ext4_direct_IO,
2833 .migratepage = buffer_migrate_page, 2834 .migratepage = buffer_migrate_page,
2835 .is_partially_uptodate = block_is_partially_uptodate,
2834}; 2836};
2835 2837
2836static const struct address_space_operations ext4_journalled_aops = { 2838static const struct address_space_operations ext4_journalled_aops = {
2837 .readpage = ext4_readpage, 2839 .readpage = ext4_readpage,
2838 .readpages = ext4_readpages, 2840 .readpages = ext4_readpages,
2839 .writepage = ext4_journalled_writepage, 2841 .writepage = ext4_journalled_writepage,
2840 .sync_page = block_sync_page, 2842 .sync_page = block_sync_page,
2841 .write_begin = ext4_write_begin, 2843 .write_begin = ext4_write_begin,
2842 .write_end = ext4_journalled_write_end, 2844 .write_end = ext4_journalled_write_end,
2843 .set_page_dirty = ext4_journalled_set_page_dirty, 2845 .set_page_dirty = ext4_journalled_set_page_dirty,
2844 .bmap = ext4_bmap, 2846 .bmap = ext4_bmap,
2845 .invalidatepage = ext4_invalidatepage, 2847 .invalidatepage = ext4_invalidatepage,
2846 .releasepage = ext4_releasepage, 2848 .releasepage = ext4_releasepage,
2849 .is_partially_uptodate = block_is_partially_uptodate,
2847}; 2850};
2848 2851
2849static const struct address_space_operations ext4_da_aops = { 2852static const struct address_space_operations ext4_da_aops = {
2850 .readpage = ext4_readpage, 2853 .readpage = ext4_readpage,
2851 .readpages = ext4_readpages, 2854 .readpages = ext4_readpages,
2852 .writepage = ext4_da_writepage, 2855 .writepage = ext4_da_writepage,
2853 .writepages = ext4_da_writepages, 2856 .writepages = ext4_da_writepages,
2854 .sync_page = block_sync_page, 2857 .sync_page = block_sync_page,
2855 .write_begin = ext4_da_write_begin, 2858 .write_begin = ext4_da_write_begin,
2856 .write_end = ext4_da_write_end, 2859 .write_end = ext4_da_write_end,
2857 .bmap = ext4_bmap, 2860 .bmap = ext4_bmap,
2858 .invalidatepage = ext4_da_invalidatepage, 2861 .invalidatepage = ext4_da_invalidatepage,
2859 .releasepage = ext4_releasepage, 2862 .releasepage = ext4_releasepage,
2860 .direct_IO = ext4_direct_IO, 2863 .direct_IO = ext4_direct_IO,
2861 .migratepage = buffer_migrate_page, 2864 .migratepage = buffer_migrate_page,
2865 .is_partially_uptodate = block_is_partially_uptodate,
2862}; 2866};
2863 2867
2864void ext4_set_aops(struct inode *inode) 2868void ext4_set_aops(struct inode *inode)
diff --git a/include/asm-arm/arch-s3c2410/spi.h b/include/asm-arm/arch-s3c2410/spi.h
index 352d33860b6..442169887d3 100644
--- a/include/asm-arm/arch-s3c2410/spi.h
+++ b/include/asm-arm/arch-s3c2410/spi.h
@@ -16,6 +16,7 @@
16struct s3c2410_spi_info { 16struct s3c2410_spi_info {
17 unsigned long pin_cs; /* simple gpio cs */ 17 unsigned long pin_cs; /* simple gpio cs */
18 unsigned int num_cs; /* total chipselects */ 18 unsigned int num_cs; /* total chipselects */
19 int bus_num; /* bus number to use. */
19 20
20 void (*set_cs)(struct s3c2410_spi_info *spi, int cs, int pol); 21 void (*set_cs)(struct s3c2410_spi_info *spi, int cs, int pol);
21}; 22};
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index c764a8fcb05..0f99ad38b01 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -2,6 +2,7 @@
2#define _ASM_GENERIC_GPIO_H 2#define _ASM_GENERIC_GPIO_H
3 3
4#include <linux/types.h> 4#include <linux/types.h>
5#include <linux/errno.h>
5 6
6#ifdef CONFIG_GPIOLIB 7#ifdef CONFIG_GPIOLIB
7 8
diff --git a/include/asm-generic/pgtable-nopmd.h b/include/asm-generic/pgtable-nopmd.h
index 087325ede76..a7cdc48e8b7 100644
--- a/include/asm-generic/pgtable-nopmd.h
+++ b/include/asm-generic/pgtable-nopmd.h
@@ -5,6 +5,8 @@
5 5
6#include <asm-generic/pgtable-nopud.h> 6#include <asm-generic/pgtable-nopud.h>
7 7
8struct mm_struct;
9
8#define __PAGETABLE_PMD_FOLDED 10#define __PAGETABLE_PMD_FOLDED
9 11
10/* 12/*
@@ -54,7 +56,9 @@ static inline pmd_t * pmd_offset(pud_t * pud, unsigned long address)
54 * inside the pud, so has no extra memory associated with it. 56 * inside the pud, so has no extra memory associated with it.
55 */ 57 */
56#define pmd_alloc_one(mm, address) NULL 58#define pmd_alloc_one(mm, address) NULL
57#define pmd_free(mm, x) do { } while (0) 59static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
60{
61}
58#define __pmd_free_tlb(tlb, x) do { } while (0) 62#define __pmd_free_tlb(tlb, x) do { } while (0)
59 63
60#undef pmd_addr_end 64#undef pmd_addr_end
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 82aa36c53ea..50cfe8ceb47 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -205,6 +205,8 @@ void block_invalidatepage(struct page *page, unsigned long offset);
205int block_write_full_page(struct page *page, get_block_t *get_block, 205int block_write_full_page(struct page *page, get_block_t *get_block,
206 struct writeback_control *wbc); 206 struct writeback_control *wbc);
207int block_read_full_page(struct page*, get_block_t*); 207int block_read_full_page(struct page*, get_block_t*);
208int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
209 unsigned long from);
208int block_write_begin(struct file *, struct address_space *, 210int block_write_begin(struct file *, struct address_space *,
209 loff_t, unsigned, unsigned, 211 loff_t, unsigned, unsigned,
210 struct page **, void **, get_block_t*); 212 struct page **, void **, get_block_t*);
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 1b5c98e7fef..96d0509fb8d 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -62,15 +62,7 @@
62 * int next_cpu_nr(cpu, mask) Next cpu past 'cpu', or nr_cpu_ids 62 * int next_cpu_nr(cpu, mask) Next cpu past 'cpu', or nr_cpu_ids
63 * 63 *
64 * cpumask_t cpumask_of_cpu(cpu) Return cpumask with bit 'cpu' set 64 * cpumask_t cpumask_of_cpu(cpu) Return cpumask with bit 'cpu' set
65 *ifdef CONFIG_HAS_CPUMASK_OF_CPU 65 * (can be used as an lvalue)
66 * cpumask_of_cpu_ptr_declare(v) Declares cpumask_t *v
67 * cpumask_of_cpu_ptr_next(v, cpu) Sets v = &cpumask_of_cpu_map[cpu]
68 * cpumask_of_cpu_ptr(v, cpu) Combines above two operations
69 *else
70 * cpumask_of_cpu_ptr_declare(v) Declares cpumask_t _v and *v = &_v
71 * cpumask_of_cpu_ptr_next(v, cpu) Sets _v = cpumask_of_cpu(cpu)
72 * cpumask_of_cpu_ptr(v, cpu) Combines above two operations
73 *endif
74 * CPU_MASK_ALL Initializer - all bits set 66 * CPU_MASK_ALL Initializer - all bits set
75 * CPU_MASK_NONE Initializer - no bits set 67 * CPU_MASK_NONE Initializer - no bits set
76 * unsigned long *cpus_addr(mask) Array of unsigned long's in mask 68 * unsigned long *cpus_addr(mask) Array of unsigned long's in mask
@@ -273,37 +265,30 @@ static inline void __cpus_shift_left(cpumask_t *dstp,
273 bitmap_shift_left(dstp->bits, srcp->bits, n, nbits); 265 bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
274} 266}
275 267
268/*
269 * Special-case data structure for "single bit set only" constant CPU masks.
270 *
271 * We pre-generate all the 64 (or 32) possible bit positions, with enough
272 * padding to the left and the right, and return the constant pointer
273 * appropriately offset.
274 */
275extern const unsigned long
276 cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)];
277
278static inline const cpumask_t *get_cpu_mask(unsigned int cpu)
279{
280 const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG];
281 p -= cpu / BITS_PER_LONG;
282 return (const cpumask_t *)p;
283}
284
285/*
286 * In cases where we take the address of the cpumask immediately,
287 * gcc optimizes it out (it's a constant) and there's no huge stack
288 * variable created:
289 */
290#define cpumask_of_cpu(cpu) ({ *get_cpu_mask(cpu); })
276 291
277#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
278extern cpumask_t *cpumask_of_cpu_map;
279#define cpumask_of_cpu(cpu) (cpumask_of_cpu_map[cpu])
280#define cpumask_of_cpu_ptr(v, cpu) \
281 const cpumask_t *v = &cpumask_of_cpu(cpu)
282#define cpumask_of_cpu_ptr_declare(v) \
283 const cpumask_t *v
284#define cpumask_of_cpu_ptr_next(v, cpu) \
285 v = &cpumask_of_cpu(cpu)
286#else
287#define cpumask_of_cpu(cpu) \
288({ \
289 typeof(_unused_cpumask_arg_) m; \
290 if (sizeof(m) == sizeof(unsigned long)) { \
291 m.bits[0] = 1UL<<(cpu); \
292 } else { \
293 cpus_clear(m); \
294 cpu_set((cpu), m); \
295 } \
296 m; \
297})
298#define cpumask_of_cpu_ptr(v, cpu) \
299 cpumask_t _##v = cpumask_of_cpu(cpu); \
300 const cpumask_t *v = &_##v
301#define cpumask_of_cpu_ptr_declare(v) \
302 cpumask_t _##v; \
303 const cpumask_t *v = &_##v
304#define cpumask_of_cpu_ptr_next(v, cpu) \
305 _##v = cpumask_of_cpu(cpu)
306#endif
307 292
308#define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS) 293#define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS)
309 294
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8252b045e62..580b513668f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -443,6 +443,27 @@ static inline size_t iov_iter_count(struct iov_iter *i)
443 return i->count; 443 return i->count;
444} 444}
445 445
446/*
447 * "descriptor" for what we're up to with a read.
448 * This allows us to use the same read code yet
449 * have multiple different users of the data that
450 * we read from a file.
451 *
452 * The simplest case just copies the data to user
453 * mode.
454 */
455typedef struct {
456 size_t written;
457 size_t count;
458 union {
459 char __user *buf;
460 void *data;
461 } arg;
462 int error;
463} read_descriptor_t;
464
465typedef int (*read_actor_t)(read_descriptor_t *, struct page *,
466 unsigned long, unsigned long);
446 467
447struct address_space_operations { 468struct address_space_operations {
448 int (*writepage)(struct page *page, struct writeback_control *wbc); 469 int (*writepage)(struct page *page, struct writeback_control *wbc);
@@ -484,6 +505,8 @@ struct address_space_operations {
484 int (*migratepage) (struct address_space *, 505 int (*migratepage) (struct address_space *,
485 struct page *, struct page *); 506 struct page *, struct page *);
486 int (*launder_page) (struct page *); 507 int (*launder_page) (struct page *);
508 int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
509 unsigned long);
487}; 510};
488 511
489/* 512/*
@@ -1198,27 +1221,6 @@ struct block_device_operations {
1198 struct module *owner; 1221 struct module *owner;
1199}; 1222};
1200 1223
1201/*
1202 * "descriptor" for what we're up to with a read.
1203 * This allows us to use the same read code yet
1204 * have multiple different users of the data that
1205 * we read from a file.
1206 *
1207 * The simplest case just copies the data to user
1208 * mode.
1209 */
1210typedef struct {
1211 size_t written;
1212 size_t count;
1213 union {
1214 char __user * buf;
1215 void *data;
1216 } arg;
1217 int error;
1218} read_descriptor_t;
1219
1220typedef int (*read_actor_t)(read_descriptor_t *, struct page *, unsigned long, unsigned long);
1221
1222/* These macros are for out of kernel modules to test that 1224/* These macros are for out of kernel modules to test that
1223 * the kernel supports the unlocked_ioctl and compat_ioctl 1225 * the kernel supports the unlocked_ioctl and compat_ioctl
1224 * fields in struct file_operations. */ 1226 * fields in struct file_operations. */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6e695eaab4c..866a3dbe5c7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1104,6 +1104,9 @@ extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
1104 unsigned long addr, unsigned long len, pgoff_t pgoff); 1104 unsigned long addr, unsigned long len, pgoff_t pgoff);
1105extern void exit_mmap(struct mm_struct *); 1105extern void exit_mmap(struct mm_struct *);
1106 1106
1107extern int mm_take_all_locks(struct mm_struct *mm);
1108extern void mm_drop_all_locks(struct mm_struct *mm);
1109
1107#ifdef CONFIG_PROC_FS 1110#ifdef CONFIG_PROC_FS
1108/* From fs/proc/base.c. callers must _not_ hold the mm's exe_file_lock */ 1111/* From fs/proc/base.c. callers must _not_ hold the mm's exe_file_lock */
1109extern void added_exe_file_vma(struct mm_struct *mm); 1112extern void added_exe_file_vma(struct mm_struct *mm);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 746f975b58e..386edbe2cb4 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -10,6 +10,7 @@
10#include <linux/rbtree.h> 10#include <linux/rbtree.h>
11#include <linux/rwsem.h> 11#include <linux/rwsem.h>
12#include <linux/completion.h> 12#include <linux/completion.h>
13#include <linux/cpumask.h>
13#include <asm/page.h> 14#include <asm/page.h>
14#include <asm/mmu.h> 15#include <asm/mmu.h>
15 16
@@ -253,6 +254,9 @@ struct mm_struct {
253 struct file *exe_file; 254 struct file *exe_file;
254 unsigned long num_exe_file_vmas; 255 unsigned long num_exe_file_vmas;
255#endif 256#endif
257#ifdef CONFIG_MMU_NOTIFIER
258 struct mmu_notifier_mm *mmu_notifier_mm;
259#endif
256}; 260};
257 261
258#endif /* _LINUX_MM_TYPES_H */ 262#endif /* _LINUX_MM_TYPES_H */
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
new file mode 100644
index 00000000000..b77486d152c
--- /dev/null
+++ b/include/linux/mmu_notifier.h
@@ -0,0 +1,279 @@
1#ifndef _LINUX_MMU_NOTIFIER_H
2#define _LINUX_MMU_NOTIFIER_H
3
4#include <linux/list.h>
5#include <linux/spinlock.h>
6#include <linux/mm_types.h>
7
8struct mmu_notifier;
9struct mmu_notifier_ops;
10
11#ifdef CONFIG_MMU_NOTIFIER
12
13/*
14 * The mmu notifier_mm structure is allocated and installed in
15 * mm->mmu_notifier_mm inside the mm_take_all_locks() protected
16 * critical section and it's released only when mm_count reaches zero
17 * in mmdrop().
18 */
19struct mmu_notifier_mm {
20 /* all mmu notifiers registerd in this mm are queued in this list */
21 struct hlist_head list;
22 /* to serialize the list modifications and hlist_unhashed */
23 spinlock_t lock;
24};
25
26struct mmu_notifier_ops {
27 /*
28 * Called either by mmu_notifier_unregister or when the mm is
29 * being destroyed by exit_mmap, always before all pages are
30 * freed. This can run concurrently with other mmu notifier
31 * methods (the ones invoked outside the mm context) and it
32 * should tear down all secondary mmu mappings and freeze the
33 * secondary mmu. If this method isn't implemented you've to
34 * be sure that nothing could possibly write to the pages
35 * through the secondary mmu by the time the last thread with
36 * tsk->mm == mm exits.
37 *
38 * As side note: the pages freed after ->release returns could
39 * be immediately reallocated by the gart at an alias physical
40 * address with a different cache model, so if ->release isn't
41 * implemented because all _software_ driven memory accesses
42 * through the secondary mmu are terminated by the time the
43 * last thread of this mm quits, you've also to be sure that
44 * speculative _hardware_ operations can't allocate dirty
45 * cachelines in the cpu that could not be snooped and made
46 * coherent with the other read and write operations happening
47 * through the gart alias address, so leading to memory
48 * corruption.
49 */
50 void (*release)(struct mmu_notifier *mn,
51 struct mm_struct *mm);
52
53 /*
54 * clear_flush_young is called after the VM is
55 * test-and-clearing the young/accessed bitflag in the
56 * pte. This way the VM will provide proper aging to the
57 * accesses to the page through the secondary MMUs and not
58 * only to the ones through the Linux pte.
59 */
60 int (*clear_flush_young)(struct mmu_notifier *mn,
61 struct mm_struct *mm,
62 unsigned long address);
63
64 /*
65 * Before this is invoked any secondary MMU is still ok to
66 * read/write to the page previously pointed to by the Linux
67 * pte because the page hasn't been freed yet and it won't be
68 * freed until this returns. If required set_page_dirty has to
69 * be called internally to this method.
70 */
71 void (*invalidate_page)(struct mmu_notifier *mn,
72 struct mm_struct *mm,
73 unsigned long address);
74
75 /*
76 * invalidate_range_start() and invalidate_range_end() must be
77 * paired and are called only when the mmap_sem and/or the
78 * locks protecting the reverse maps are held. The subsystem
79 * must guarantee that no additional references are taken to
80 * the pages in the range established between the call to
81 * invalidate_range_start() and the matching call to
82 * invalidate_range_end().
83 *
84 * Invalidation of multiple concurrent ranges may be
85 * optionally permitted by the driver. Either way the
86 * establishment of sptes is forbidden in the range passed to
87 * invalidate_range_begin/end for the whole duration of the
88 * invalidate_range_begin/end critical section.
89 *
90 * invalidate_range_start() is called when all pages in the
91 * range are still mapped and have at least a refcount of one.
92 *
93 * invalidate_range_end() is called when all pages in the
94 * range have been unmapped and the pages have been freed by
95 * the VM.
96 *
97 * The VM will remove the page table entries and potentially
98 * the page between invalidate_range_start() and
99 * invalidate_range_end(). If the page must not be freed
100 * because of pending I/O or other circumstances then the
101 * invalidate_range_start() callback (or the initial mapping
102 * by the driver) must make sure that the refcount is kept
103 * elevated.
104 *
105 * If the driver increases the refcount when the pages are
106 * initially mapped into an address space then either
107 * invalidate_range_start() or invalidate_range_end() may
108 * decrease the refcount. If the refcount is decreased on
109 * invalidate_range_start() then the VM can free pages as page
110 * table entries are removed. If the refcount is only
111 * droppped on invalidate_range_end() then the driver itself
112 * will drop the last refcount but it must take care to flush
113 * any secondary tlb before doing the final free on the
114 * page. Pages will no longer be referenced by the linux
115 * address space but may still be referenced by sptes until
116 * the last refcount is dropped.
117 */
118 void (*invalidate_range_start)(struct mmu_notifier *mn,
119 struct mm_struct *mm,
120 unsigned long start, unsigned long end);
121 void (*invalidate_range_end)(struct mmu_notifier *mn,
122 struct mm_struct *mm,
123 unsigned long start, unsigned long end);
124};
125
126/*
127 * The notifier chains are protected by mmap_sem and/or the reverse map
128 * semaphores. Notifier chains are only changed when all reverse maps and
129 * the mmap_sem locks are taken.
130 *
131 * Therefore notifier chains can only be traversed when either
132 *
133 * 1. mmap_sem is held.
134 * 2. One of the reverse map locks is held (i_mmap_lock or anon_vma->lock).
135 * 3. No other concurrent thread can access the list (release)
136 */
137struct mmu_notifier {
138 struct hlist_node hlist;
139 const struct mmu_notifier_ops *ops;
140};
141
142static inline int mm_has_notifiers(struct mm_struct *mm)
143{
144 return unlikely(mm->mmu_notifier_mm);
145}
146
147extern int mmu_notifier_register(struct mmu_notifier *mn,
148 struct mm_struct *mm);
149extern int __mmu_notifier_register(struct mmu_notifier *mn,
150 struct mm_struct *mm);
151extern void mmu_notifier_unregister(struct mmu_notifier *mn,
152 struct mm_struct *mm);
153extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
154extern void __mmu_notifier_release(struct mm_struct *mm);
155extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
156 unsigned long address);
157extern void __mmu_notifier_invalidate_page(struct mm_struct *mm,
158 unsigned long address);
159extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
160 unsigned long start, unsigned long end);
161extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
162 unsigned long start, unsigned long end);
163
164static inline void mmu_notifier_release(struct mm_struct *mm)
165{
166 if (mm_has_notifiers(mm))
167 __mmu_notifier_release(mm);
168}
169
170static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
171 unsigned long address)
172{
173 if (mm_has_notifiers(mm))
174 return __mmu_notifier_clear_flush_young(mm, address);
175 return 0;
176}
177
178static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
179 unsigned long address)
180{
181 if (mm_has_notifiers(mm))
182 __mmu_notifier_invalidate_page(mm, address);
183}
184
185static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
186 unsigned long start, unsigned long end)
187{
188 if (mm_has_notifiers(mm))
189 __mmu_notifier_invalidate_range_start(mm, start, end);
190}
191
192static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
193 unsigned long start, unsigned long end)
194{
195 if (mm_has_notifiers(mm))
196 __mmu_notifier_invalidate_range_end(mm, start, end);
197}
198
199static inline void mmu_notifier_mm_init(struct mm_struct *mm)
200{
201 mm->mmu_notifier_mm = NULL;
202}
203
204static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
205{
206 if (mm_has_notifiers(mm))
207 __mmu_notifier_mm_destroy(mm);
208}
209
210/*
211 * These two macros will sometime replace ptep_clear_flush.
212 * ptep_clear_flush is impleemnted as macro itself, so this also is
213 * implemented as a macro until ptep_clear_flush will converted to an
214 * inline function, to diminish the risk of compilation failure. The
215 * invalidate_page method over time can be moved outside the PT lock
216 * and these two macros can be later removed.
217 */
218#define ptep_clear_flush_notify(__vma, __address, __ptep) \
219({ \
220 pte_t __pte; \
221 struct vm_area_struct *___vma = __vma; \
222 unsigned long ___address = __address; \
223 __pte = ptep_clear_flush(___vma, ___address, __ptep); \
224 mmu_notifier_invalidate_page(___vma->vm_mm, ___address); \
225 __pte; \
226})
227
228#define ptep_clear_flush_young_notify(__vma, __address, __ptep) \
229({ \
230 int __young; \
231 struct vm_area_struct *___vma = __vma; \
232 unsigned long ___address = __address; \
233 __young = ptep_clear_flush_young(___vma, ___address, __ptep); \
234 __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \
235 ___address); \
236 __young; \
237})
238
239#else /* CONFIG_MMU_NOTIFIER */
240
241static inline void mmu_notifier_release(struct mm_struct *mm)
242{
243}
244
245static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
246 unsigned long address)
247{
248 return 0;
249}
250
251static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
252 unsigned long address)
253{
254}
255
256static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
257 unsigned long start, unsigned long end)
258{
259}
260
261static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
262 unsigned long start, unsigned long end)
263{
264}
265
266static inline void mmu_notifier_mm_init(struct mm_struct *mm)
267{
268}
269
270static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
271{
272}
273
274#define ptep_clear_flush_young_notify ptep_clear_flush_young
275#define ptep_clear_flush_notify ptep_clear_flush
276
277#endif /* CONFIG_MMU_NOTIFIER */
278
279#endif /* _LINUX_MMU_NOTIFIER_H */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index a81d8189042..a39b38ccdc9 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -20,6 +20,7 @@
20 */ 20 */
21#define AS_EIO (__GFP_BITS_SHIFT + 0) /* IO error on async write */ 21#define AS_EIO (__GFP_BITS_SHIFT + 0) /* IO error on async write */
22#define AS_ENOSPC (__GFP_BITS_SHIFT + 1) /* ENOSPC on async write */ 22#define AS_ENOSPC (__GFP_BITS_SHIFT + 1) /* ENOSPC on async write */
23#define AS_MM_ALL_LOCKS (__GFP_BITS_SHIFT + 2) /* under mm_take_all_locks() */
23 24
24static inline void mapping_set_error(struct address_space *mapping, int error) 25static inline void mapping_set_error(struct address_space *mapping, int error)
25{ 26{
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index b0f39be08b6..eb4443c7e05 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -98,6 +98,34 @@ static inline void list_del_rcu(struct list_head *entry)
98} 98}
99 99
100/** 100/**
101 * hlist_del_init_rcu - deletes entry from hash list with re-initialization
102 * @n: the element to delete from the hash list.
103 *
104 * Note: list_unhashed() on the node return true after this. It is
105 * useful for RCU based read lockfree traversal if the writer side
106 * must know if the list entry is still hashed or already unhashed.
107 *
108 * In particular, it means that we can not poison the forward pointers
109 * that may still be used for walking the hash list and we can only
110 * zero the pprev pointer so list_unhashed() will return true after
111 * this.
112 *
113 * The caller must take whatever precautions are necessary (such as
114 * holding appropriate locks) to avoid racing with another
115 * list-mutation primitive, such as hlist_add_head_rcu() or
116 * hlist_del_rcu(), running on this same list. However, it is
117 * perfectly legal to run concurrently with the _rcu list-traversal
118 * primitives, such as hlist_for_each_entry_rcu().
119 */
120static inline void hlist_del_init_rcu(struct hlist_node *n)
121{
122 if (!hlist_unhashed(n)) {
123 __hlist_del(n);
124 n->pprev = NULL;
125 }
126}
127
128/**
101 * list_replace_rcu - replace old entry by new one 129 * list_replace_rcu - replace old entry by new one
102 * @old : the element to be replaced 130 * @old : the element to be replaced
103 * @new : the new element to insert 131 * @new : the new element to insert
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 1383692ac5b..69407f85e10 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -26,6 +26,14 @@
26 */ 26 */
27struct anon_vma { 27struct anon_vma {
28 spinlock_t lock; /* Serialize access to vma list */ 28 spinlock_t lock; /* Serialize access to vma list */
29 /*
30 * NOTE: the LSB of the head.next is set by
31 * mm_take_all_locks() _after_ taking the above lock. So the
32 * head must only be read/written after taking the above lock
33 * to be sure to see a valid next pointer. The LSB bit itself
34 * is serialized by a system wide lock only visible to
35 * mm_take_all_locks() (mm_all_locks_mutex).
36 */
29 struct list_head head; /* List of private "related" vmas */ 37 struct list_head head; /* List of private "related" vmas */
30}; 38};
31 39
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 29510d68338..e202a68d1cc 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -455,3 +455,28 @@ out:
455#endif /* CONFIG_PM_SLEEP_SMP */ 455#endif /* CONFIG_PM_SLEEP_SMP */
456 456
457#endif /* CONFIG_SMP */ 457#endif /* CONFIG_SMP */
458
459/*
460 * cpu_bit_bitmap[] is a special, "compressed" data structure that
461 * represents all NR_CPUS bits binary values of 1<<nr.
462 *
463 * It is used by cpumask_of_cpu() to get a constant address to a CPU
464 * mask value that has a single bit set only.
465 */
466
467/* cpu_bit_bitmap[0] is empty - so we can back into it */
468#define MASK_DECLARE_1(x) [x+1][0] = 1UL << (x)
469#define MASK_DECLARE_2(x) MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
470#define MASK_DECLARE_4(x) MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
471#define MASK_DECLARE_8(x) MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
472
473const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
474
475 MASK_DECLARE_8(0), MASK_DECLARE_8(8),
476 MASK_DECLARE_8(16), MASK_DECLARE_8(24),
477#if BITS_PER_LONG > 32
478 MASK_DECLARE_8(32), MASK_DECLARE_8(40),
479 MASK_DECLARE_8(48), MASK_DECLARE_8(56),
480#endif
481};
482EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
diff --git a/kernel/fork.c b/kernel/fork.c
index 8214ba7c8bb..7ce2ebe8479 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -27,6 +27,7 @@
27#include <linux/key.h> 27#include <linux/key.h>
28#include <linux/binfmts.h> 28#include <linux/binfmts.h>
29#include <linux/mman.h> 29#include <linux/mman.h>
30#include <linux/mmu_notifier.h>
30#include <linux/fs.h> 31#include <linux/fs.h>
31#include <linux/nsproxy.h> 32#include <linux/nsproxy.h>
32#include <linux/capability.h> 33#include <linux/capability.h>
@@ -414,6 +415,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
414 415
415 if (likely(!mm_alloc_pgd(mm))) { 416 if (likely(!mm_alloc_pgd(mm))) {
416 mm->def_flags = 0; 417 mm->def_flags = 0;
418 mmu_notifier_mm_init(mm);
417 return mm; 419 return mm;
418 } 420 }
419 421
@@ -446,6 +448,7 @@ void __mmdrop(struct mm_struct *mm)
446 BUG_ON(mm == &init_mm); 448 BUG_ON(mm == &init_mm);
447 mm_free_pgd(mm); 449 mm_free_pgd(mm);
448 destroy_context(mm); 450 destroy_context(mm);
451 mmu_notifier_mm_destroy(mm);
449 free_mm(mm); 452 free_mm(mm);
450} 453}
451EXPORT_SYMBOL_GPL(__mmdrop); 454EXPORT_SYMBOL_GPL(__mmdrop);
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index bf43284d685..80c4336f418 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -196,12 +196,10 @@ static int tick_check_new_device(struct clock_event_device *newdev)
196 struct tick_device *td; 196 struct tick_device *td;
197 int cpu, ret = NOTIFY_OK; 197 int cpu, ret = NOTIFY_OK;
198 unsigned long flags; 198 unsigned long flags;
199 cpumask_of_cpu_ptr_declare(cpumask);
200 199
201 spin_lock_irqsave(&tick_device_lock, flags); 200 spin_lock_irqsave(&tick_device_lock, flags);
202 201
203 cpu = smp_processor_id(); 202 cpu = smp_processor_id();
204 cpumask_of_cpu_ptr_next(cpumask, cpu);
205 if (!cpu_isset(cpu, newdev->cpumask)) 203 if (!cpu_isset(cpu, newdev->cpumask))
206 goto out_bc; 204 goto out_bc;
207 205
@@ -209,7 +207,7 @@ static int tick_check_new_device(struct clock_event_device *newdev)
209 curdev = td->evtdev; 207 curdev = td->evtdev;
210 208
211 /* cpu local device ? */ 209 /* cpu local device ? */
212 if (!cpus_equal(newdev->cpumask, *cpumask)) { 210 if (!cpus_equal(newdev->cpumask, cpumask_of_cpu(cpu))) {
213 211
214 /* 212 /*
215 * If the cpu affinity of the device interrupt can not 213 * If the cpu affinity of the device interrupt can not
@@ -222,7 +220,7 @@ static int tick_check_new_device(struct clock_event_device *newdev)
222 * If we have a cpu local device already, do not replace it 220 * If we have a cpu local device already, do not replace it
223 * by a non cpu local device 221 * by a non cpu local device
224 */ 222 */
225 if (curdev && cpus_equal(curdev->cpumask, *cpumask)) 223 if (curdev && cpus_equal(curdev->cpumask, cpumask_of_cpu(cpu)))
226 goto out_bc; 224 goto out_bc;
227 } 225 }
228 226
@@ -254,7 +252,7 @@ static int tick_check_new_device(struct clock_event_device *newdev)
254 curdev = NULL; 252 curdev = NULL;
255 } 253 }
256 clockevents_exchange_device(curdev, newdev); 254 clockevents_exchange_device(curdev, newdev);
257 tick_setup_device(td, newdev, cpu, cpumask); 255 tick_setup_device(td, newdev, cpu, &cpumask_of_cpu(cpu));
258 if (newdev->features & CLOCK_EVT_FEAT_ONESHOT) 256 if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
259 tick_oneshot_notify(); 257 tick_oneshot_notify();
260 258
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index ce2d723c10e..bb948e52ce2 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -213,9 +213,7 @@ static void start_stack_timers(void)
213 int cpu; 213 int cpu;
214 214
215 for_each_online_cpu(cpu) { 215 for_each_online_cpu(cpu) {
216 cpumask_of_cpu_ptr(new_mask, cpu); 216 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
217
218 set_cpus_allowed_ptr(current, new_mask);
219 start_stack_timer(cpu); 217 start_stack_timer(cpu);
220 } 218 }
221 set_cpus_allowed_ptr(current, &saved_mask); 219 set_cpus_allowed_ptr(current, &saved_mask);
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index 35136671b21..26187edcc7e 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -15,7 +15,6 @@
15#include <linux/module.h> 15#include <linux/module.h>
16 16
17static DEFINE_SPINLOCK(ratelimit_lock); 17static DEFINE_SPINLOCK(ratelimit_lock);
18static unsigned long flags;
19 18
20/* 19/*
21 * __ratelimit - rate limiting 20 * __ratelimit - rate limiting
@@ -26,6 +25,8 @@ static unsigned long flags;
26 */ 25 */
27int __ratelimit(struct ratelimit_state *rs) 26int __ratelimit(struct ratelimit_state *rs)
28{ 27{
28 unsigned long flags;
29
29 if (!rs->interval) 30 if (!rs->interval)
30 return 1; 31 return 1;
31 32
diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
index c4381d9516f..0f8fc22ed10 100644
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -11,7 +11,6 @@ notrace unsigned int debug_smp_processor_id(void)
11{ 11{
12 unsigned long preempt_count = preempt_count(); 12 unsigned long preempt_count = preempt_count();
13 int this_cpu = raw_smp_processor_id(); 13 int this_cpu = raw_smp_processor_id();
14 cpumask_of_cpu_ptr_declare(this_mask);
15 14
16 if (likely(preempt_count)) 15 if (likely(preempt_count))
17 goto out; 16 goto out;
@@ -23,9 +22,7 @@ notrace unsigned int debug_smp_processor_id(void)
23 * Kernel threads bound to a single CPU can safely use 22 * Kernel threads bound to a single CPU can safely use
24 * smp_processor_id(): 23 * smp_processor_id():
25 */ 24 */
26 cpumask_of_cpu_ptr_next(this_mask, this_cpu); 25 if (cpus_equal(current->cpus_allowed, cpumask_of_cpu(this_cpu)))
27
28 if (cpus_equal(current->cpus_allowed, *this_mask))
29 goto out; 26 goto out;
30 27
31 /* 28 /*
diff --git a/mm/Kconfig b/mm/Kconfig
index efee5d379df..446c6588c75 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -208,3 +208,6 @@ config NR_QUICK
208config VIRT_TO_BUS 208config VIRT_TO_BUS
209 def_bool y 209 def_bool y
210 depends on !ARCH_NO_VIRT_TO_BUS 210 depends on !ARCH_NO_VIRT_TO_BUS
211
212config MMU_NOTIFIER
213 bool
diff --git a/mm/Makefile b/mm/Makefile
index 06ca2381fef..da4ccf015ae 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_SHMEM) += shmem.o
25obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o 25obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o
26obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o 26obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o
27obj-$(CONFIG_SLOB) += slob.o 27obj-$(CONFIG_SLOB) += slob.o
28obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
28obj-$(CONFIG_SLAB) += slab.o 29obj-$(CONFIG_SLAB) += slab.o
29obj-$(CONFIG_SLUB) += slub.o 30obj-$(CONFIG_SLUB) += slub.o
30obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o 31obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
diff --git a/mm/filemap.c b/mm/filemap.c
index 5de7633e1db..42bbc6909ba 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1023,8 +1023,17 @@ find_page:
1023 ra, filp, page, 1023 ra, filp, page,
1024 index, last_index - index); 1024 index, last_index - index);
1025 } 1025 }
1026 if (!PageUptodate(page)) 1026 if (!PageUptodate(page)) {
1027 goto page_not_up_to_date; 1027 if (inode->i_blkbits == PAGE_CACHE_SHIFT ||
1028 !mapping->a_ops->is_partially_uptodate)
1029 goto page_not_up_to_date;
1030 if (TestSetPageLocked(page))
1031 goto page_not_up_to_date;
1032 if (!mapping->a_ops->is_partially_uptodate(page,
1033 desc, offset))
1034 goto page_not_up_to_date_locked;
1035 unlock_page(page);
1036 }
1028page_ok: 1037page_ok:
1029 /* 1038 /*
1030 * i_size must be checked after we know the page is Uptodate. 1039 * i_size must be checked after we know the page is Uptodate.
@@ -1094,6 +1103,7 @@ page_not_up_to_date:
1094 if (lock_page_killable(page)) 1103 if (lock_page_killable(page))
1095 goto readpage_eio; 1104 goto readpage_eio;
1096 1105
1106page_not_up_to_date_locked:
1097 /* Did it get truncated before we got the lock? */ 1107 /* Did it get truncated before we got the lock? */
1098 if (!page->mapping) { 1108 if (!page->mapping) {
1099 unlock_page(page); 1109 unlock_page(page);
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 98a3f31ccd6..380ab402d71 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -13,6 +13,7 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/uio.h> 14#include <linux/uio.h>
15#include <linux/rmap.h> 15#include <linux/rmap.h>
16#include <linux/mmu_notifier.h>
16#include <linux/sched.h> 17#include <linux/sched.h>
17#include <asm/tlbflush.h> 18#include <asm/tlbflush.h>
18#include <asm/io.h> 19#include <asm/io.h>
@@ -188,7 +189,7 @@ __xip_unmap (struct address_space * mapping,
188 if (pte) { 189 if (pte) {
189 /* Nuke the page table entry. */ 190 /* Nuke the page table entry. */
190 flush_cache_page(vma, address, pte_pfn(*pte)); 191 flush_cache_page(vma, address, pte_pfn(*pte));
191 pteval = ptep_clear_flush(vma, address, pte); 192 pteval = ptep_clear_flush_notify(vma, address, pte);
192 page_remove_rmap(page, vma); 193 page_remove_rmap(page, vma);
193 dec_mm_counter(mm, file_rss); 194 dec_mm_counter(mm, file_rss);
194 BUG_ON(pte_dirty(pteval)); 195 BUG_ON(pte_dirty(pteval));
diff --git a/mm/fremap.c b/mm/fremap.c
index 07a9c82ce1a..7881638e4a1 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -15,6 +15,7 @@
15#include <linux/rmap.h> 15#include <linux/rmap.h>
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/syscalls.h> 17#include <linux/syscalls.h>
18#include <linux/mmu_notifier.h>
18 19
19#include <asm/mmu_context.h> 20#include <asm/mmu_context.h>
20#include <asm/cacheflush.h> 21#include <asm/cacheflush.h>
@@ -214,7 +215,9 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
214 spin_unlock(&mapping->i_mmap_lock); 215 spin_unlock(&mapping->i_mmap_lock);
215 } 216 }
216 217
218 mmu_notifier_invalidate_range_start(mm, start, start + size);
217 err = populate_range(mm, vma, start, size, pgoff); 219 err = populate_range(mm, vma, start, size, pgoff);
220 mmu_notifier_invalidate_range_end(mm, start, start + size);
218 if (!err && !(flags & MAP_NONBLOCK)) { 221 if (!err && !(flags & MAP_NONBLOCK)) {
219 if (unlikely(has_write_lock)) { 222 if (unlikely(has_write_lock)) {
220 downgrade_write(&mm->mmap_sem); 223 downgrade_write(&mm->mmap_sem);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3be79dc18c5..254ce2b9015 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -9,6 +9,7 @@
9#include <linux/mm.h> 9#include <linux/mm.h>
10#include <linux/sysctl.h> 10#include <linux/sysctl.h>
11#include <linux/highmem.h> 11#include <linux/highmem.h>
12#include <linux/mmu_notifier.h>
12#include <linux/nodemask.h> 13#include <linux/nodemask.h>
13#include <linux/pagemap.h> 14#include <linux/pagemap.h>
14#include <linux/mempolicy.h> 15#include <linux/mempolicy.h>
@@ -19,6 +20,7 @@
19 20
20#include <asm/page.h> 21#include <asm/page.h>
21#include <asm/pgtable.h> 22#include <asm/pgtable.h>
23#include <asm/io.h>
22 24
23#include <linux/hugetlb.h> 25#include <linux/hugetlb.h>
24#include "internal.h" 26#include "internal.h"
@@ -1672,6 +1674,7 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
1672 BUG_ON(start & ~huge_page_mask(h)); 1674 BUG_ON(start & ~huge_page_mask(h));
1673 BUG_ON(end & ~huge_page_mask(h)); 1675 BUG_ON(end & ~huge_page_mask(h));
1674 1676
1677 mmu_notifier_invalidate_range_start(mm, start, end);
1675 spin_lock(&mm->page_table_lock); 1678 spin_lock(&mm->page_table_lock);
1676 for (address = start; address < end; address += sz) { 1679 for (address = start; address < end; address += sz) {
1677 ptep = huge_pte_offset(mm, address); 1680 ptep = huge_pte_offset(mm, address);
@@ -1713,6 +1716,7 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
1713 } 1716 }
1714 spin_unlock(&mm->page_table_lock); 1717 spin_unlock(&mm->page_table_lock);
1715 flush_tlb_range(vma, start, end); 1718 flush_tlb_range(vma, start, end);
1719 mmu_notifier_invalidate_range_end(mm, start, end);
1716 list_for_each_entry_safe(page, tmp, &page_list, lru) { 1720 list_for_each_entry_safe(page, tmp, &page_list, lru) {
1717 list_del(&page->lru); 1721 list_del(&page->lru);
1718 put_page(page); 1722 put_page(page);
diff --git a/mm/memory.c b/mm/memory.c
index a8ca04faaea..67f0ab9077d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -51,6 +51,7 @@
51#include <linux/init.h> 51#include <linux/init.h>
52#include <linux/writeback.h> 52#include <linux/writeback.h>
53#include <linux/memcontrol.h> 53#include <linux/memcontrol.h>
54#include <linux/mmu_notifier.h>
54 55
55#include <asm/pgalloc.h> 56#include <asm/pgalloc.h>
56#include <asm/uaccess.h> 57#include <asm/uaccess.h>
@@ -652,6 +653,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
652 unsigned long next; 653 unsigned long next;
653 unsigned long addr = vma->vm_start; 654 unsigned long addr = vma->vm_start;
654 unsigned long end = vma->vm_end; 655 unsigned long end = vma->vm_end;
656 int ret;
655 657
656 /* 658 /*
657 * Don't copy ptes where a page fault will fill them correctly. 659 * Don't copy ptes where a page fault will fill them correctly.
@@ -667,17 +669,33 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
667 if (is_vm_hugetlb_page(vma)) 669 if (is_vm_hugetlb_page(vma))
668 return copy_hugetlb_page_range(dst_mm, src_mm, vma); 670 return copy_hugetlb_page_range(dst_mm, src_mm, vma);
669 671
672 /*
673 * We need to invalidate the secondary MMU mappings only when
674 * there could be a permission downgrade on the ptes of the
675 * parent mm. And a permission downgrade will only happen if
676 * is_cow_mapping() returns true.
677 */
678 if (is_cow_mapping(vma->vm_flags))
679 mmu_notifier_invalidate_range_start(src_mm, addr, end);
680
681 ret = 0;
670 dst_pgd = pgd_offset(dst_mm, addr); 682 dst_pgd = pgd_offset(dst_mm, addr);
671 src_pgd = pgd_offset(src_mm, addr); 683 src_pgd = pgd_offset(src_mm, addr);
672 do { 684 do {
673 next = pgd_addr_end(addr, end); 685 next = pgd_addr_end(addr, end);
674 if (pgd_none_or_clear_bad(src_pgd)) 686 if (pgd_none_or_clear_bad(src_pgd))
675 continue; 687 continue;
676 if (copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd, 688 if (unlikely(copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd,
677 vma, addr, next)) 689 vma, addr, next))) {
678 return -ENOMEM; 690 ret = -ENOMEM;
691 break;
692 }
679 } while (dst_pgd++, src_pgd++, addr = next, addr != end); 693 } while (dst_pgd++, src_pgd++, addr = next, addr != end);
680 return 0; 694
695 if (is_cow_mapping(vma->vm_flags))
696 mmu_notifier_invalidate_range_end(src_mm,
697 vma->vm_start, end);
698 return ret;
681} 699}
682 700
683static unsigned long zap_pte_range(struct mmu_gather *tlb, 701static unsigned long zap_pte_range(struct mmu_gather *tlb,
@@ -881,7 +899,9 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
881 unsigned long start = start_addr; 899 unsigned long start = start_addr;
882 spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL; 900 spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
883 int fullmm = (*tlbp)->fullmm; 901 int fullmm = (*tlbp)->fullmm;
902 struct mm_struct *mm = vma->vm_mm;
884 903
904 mmu_notifier_invalidate_range_start(mm, start_addr, end_addr);
885 for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) { 905 for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) {
886 unsigned long end; 906 unsigned long end;
887 907
@@ -946,6 +966,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
946 } 966 }
947 } 967 }
948out: 968out:
969 mmu_notifier_invalidate_range_end(mm, start_addr, end_addr);
949 return start; /* which is now the end (or restart) address */ 970 return start; /* which is now the end (or restart) address */
950} 971}
951 972
@@ -1616,10 +1637,11 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
1616{ 1637{
1617 pgd_t *pgd; 1638 pgd_t *pgd;
1618 unsigned long next; 1639 unsigned long next;
1619 unsigned long end = addr + size; 1640 unsigned long start = addr, end = addr + size;
1620 int err; 1641 int err;
1621 1642
1622 BUG_ON(addr >= end); 1643 BUG_ON(addr >= end);
1644 mmu_notifier_invalidate_range_start(mm, start, end);
1623 pgd = pgd_offset(mm, addr); 1645 pgd = pgd_offset(mm, addr);
1624 do { 1646 do {
1625 next = pgd_addr_end(addr, end); 1647 next = pgd_addr_end(addr, end);
@@ -1627,6 +1649,7 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
1627 if (err) 1649 if (err)
1628 break; 1650 break;
1629 } while (pgd++, addr = next, addr != end); 1651 } while (pgd++, addr = next, addr != end);
1652 mmu_notifier_invalidate_range_end(mm, start, end);
1630 return err; 1653 return err;
1631} 1654}
1632EXPORT_SYMBOL_GPL(apply_to_page_range); 1655EXPORT_SYMBOL_GPL(apply_to_page_range);
@@ -1839,7 +1862,7 @@ gotten:
1839 * seen in the presence of one thread doing SMC and another 1862 * seen in the presence of one thread doing SMC and another
1840 * thread doing COW. 1863 * thread doing COW.
1841 */ 1864 */
1842 ptep_clear_flush(vma, address, page_table); 1865 ptep_clear_flush_notify(vma, address, page_table);
1843 set_pte_at(mm, address, page_table, entry); 1866 set_pte_at(mm, address, page_table, entry);
1844 update_mmu_cache(vma, address, entry); 1867 update_mmu_cache(vma, address, entry);
1845 lru_cache_add_active(new_page); 1868 lru_cache_add_active(new_page);
diff --git a/mm/mmap.c b/mm/mmap.c
index 5e0cc99e9cd..245c3d69067 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -26,6 +26,7 @@
26#include <linux/mount.h> 26#include <linux/mount.h>
27#include <linux/mempolicy.h> 27#include <linux/mempolicy.h>
28#include <linux/rmap.h> 28#include <linux/rmap.h>
29#include <linux/mmu_notifier.h>
29 30
30#include <asm/uaccess.h> 31#include <asm/uaccess.h>
31#include <asm/cacheflush.h> 32#include <asm/cacheflush.h>
@@ -2061,6 +2062,7 @@ void exit_mmap(struct mm_struct *mm)
2061 2062
2062 /* mm's last user has gone, and its about to be pulled down */ 2063 /* mm's last user has gone, and its about to be pulled down */
2063 arch_exit_mmap(mm); 2064 arch_exit_mmap(mm);
2065 mmu_notifier_release(mm);
2064 2066
2065 lru_add_drain(); 2067 lru_add_drain();
2066 flush_cache_mm(mm); 2068 flush_cache_mm(mm);
@@ -2268,3 +2270,161 @@ int install_special_mapping(struct mm_struct *mm,
2268 2270
2269 return 0; 2271 return 0;
2270} 2272}
2273
2274static DEFINE_MUTEX(mm_all_locks_mutex);
2275
2276static void vm_lock_anon_vma(struct anon_vma *anon_vma)
2277{
2278 if (!test_bit(0, (unsigned long *) &anon_vma->head.next)) {
2279 /*
2280 * The LSB of head.next can't change from under us
2281 * because we hold the mm_all_locks_mutex.
2282 */
2283 spin_lock(&anon_vma->lock);
2284 /*
2285 * We can safely modify head.next after taking the
2286 * anon_vma->lock. If some other vma in this mm shares
2287 * the same anon_vma we won't take it again.
2288 *
2289 * No need of atomic instructions here, head.next
2290 * can't change from under us thanks to the
2291 * anon_vma->lock.
2292 */
2293 if (__test_and_set_bit(0, (unsigned long *)
2294 &anon_vma->head.next))
2295 BUG();
2296 }
2297}
2298
2299static void vm_lock_mapping(struct address_space *mapping)
2300{
2301 if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
2302 /*
2303 * AS_MM_ALL_LOCKS can't change from under us because
2304 * we hold the mm_all_locks_mutex.
2305 *
2306 * Operations on ->flags have to be atomic because
2307 * even if AS_MM_ALL_LOCKS is stable thanks to the
2308 * mm_all_locks_mutex, there may be other cpus
2309 * changing other bitflags in parallel to us.
2310 */
2311 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
2312 BUG();
2313 spin_lock(&mapping->i_mmap_lock);
2314 }
2315}
2316
2317/*
2318 * This operation locks against the VM for all pte/vma/mm related
2319 * operations that could ever happen on a certain mm. This includes
2320 * vmtruncate, try_to_unmap, and all page faults.
2321 *
2322 * The caller must take the mmap_sem in write mode before calling
2323 * mm_take_all_locks(). The caller isn't allowed to release the
2324 * mmap_sem until mm_drop_all_locks() returns.
2325 *
2326 * mmap_sem in write mode is required in order to block all operations
2327 * that could modify pagetables and free pages without need of
2328 * altering the vma layout (for example populate_range() with
2329 * nonlinear vmas). It's also needed in write mode to avoid new
2330 * anon_vmas to be associated with existing vmas.
2331 *
2332 * A single task can't take more than one mm_take_all_locks() in a row
2333 * or it would deadlock.
2334 *
2335 * The LSB in anon_vma->head.next and the AS_MM_ALL_LOCKS bitflag in
2336 * mapping->flags avoid to take the same lock twice, if more than one
2337 * vma in this mm is backed by the same anon_vma or address_space.
2338 *
2339 * We can take all the locks in random order because the VM code
2340 * taking i_mmap_lock or anon_vma->lock outside the mmap_sem never
2341 * takes more than one of them in a row. Secondly we're protected
2342 * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex.
2343 *
2344 * mm_take_all_locks() and mm_drop_all_locks are expensive operations
2345 * that may have to take thousand of locks.
2346 *
2347 * mm_take_all_locks() can fail if it's interrupted by signals.
2348 */
2349int mm_take_all_locks(struct mm_struct *mm)
2350{
2351 struct vm_area_struct *vma;
2352 int ret = -EINTR;
2353
2354 BUG_ON(down_read_trylock(&mm->mmap_sem));
2355
2356 mutex_lock(&mm_all_locks_mutex);
2357
2358 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2359 if (signal_pending(current))
2360 goto out_unlock;
2361 if (vma->anon_vma)
2362 vm_lock_anon_vma(vma->anon_vma);
2363 if (vma->vm_file && vma->vm_file->f_mapping)
2364 vm_lock_mapping(vma->vm_file->f_mapping);
2365 }
2366 ret = 0;
2367
2368out_unlock:
2369 if (ret)
2370 mm_drop_all_locks(mm);
2371
2372 return ret;
2373}
2374
2375static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
2376{
2377 if (test_bit(0, (unsigned long *) &anon_vma->head.next)) {
2378 /*
2379 * The LSB of head.next can't change to 0 from under
2380 * us because we hold the mm_all_locks_mutex.
2381 *
2382 * We must however clear the bitflag before unlocking
2383 * the vma so the users using the anon_vma->head will
2384 * never see our bitflag.
2385 *
2386 * No need of atomic instructions here, head.next
2387 * can't change from under us until we release the
2388 * anon_vma->lock.
2389 */
2390 if (!__test_and_clear_bit(0, (unsigned long *)
2391 &anon_vma->head.next))
2392 BUG();
2393 spin_unlock(&anon_vma->lock);
2394 }
2395}
2396
2397static void vm_unlock_mapping(struct address_space *mapping)
2398{
2399 if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
2400 /*
2401 * AS_MM_ALL_LOCKS can't change to 0 from under us
2402 * because we hold the mm_all_locks_mutex.
2403 */
2404 spin_unlock(&mapping->i_mmap_lock);
2405 if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
2406 &mapping->flags))
2407 BUG();
2408 }
2409}
2410
2411/*
2412 * The mmap_sem cannot be released by the caller until
2413 * mm_drop_all_locks() returns.
2414 */
2415void mm_drop_all_locks(struct mm_struct *mm)
2416{
2417 struct vm_area_struct *vma;
2418
2419 BUG_ON(down_read_trylock(&mm->mmap_sem));
2420 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
2421
2422 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2423 if (vma->anon_vma)
2424 vm_unlock_anon_vma(vma->anon_vma);
2425 if (vma->vm_file && vma->vm_file->f_mapping)
2426 vm_unlock_mapping(vma->vm_file->f_mapping);
2427 }
2428
2429 mutex_unlock(&mm_all_locks_mutex);
2430}
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
new file mode 100644
index 00000000000..5f4ef0250be
--- /dev/null
+++ b/mm/mmu_notifier.c
@@ -0,0 +1,277 @@
1/*
2 * linux/mm/mmu_notifier.c
3 *
4 * Copyright (C) 2008 Qumranet, Inc.
5 * Copyright (C) 2008 SGI
6 * Christoph Lameter <clameter@sgi.com>
7 *
8 * This work is licensed under the terms of the GNU GPL, version 2. See
9 * the COPYING file in the top-level directory.
10 */
11
12#include <linux/rculist.h>
13#include <linux/mmu_notifier.h>
14#include <linux/module.h>
15#include <linux/mm.h>
16#include <linux/err.h>
17#include <linux/rcupdate.h>
18#include <linux/sched.h>
19
20/*
21 * This function can't run concurrently against mmu_notifier_register
22 * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap
23 * runs with mm_users == 0. Other tasks may still invoke mmu notifiers
24 * in parallel despite there being no task using this mm any more,
25 * through the vmas outside of the exit_mmap context, such as with
26 * vmtruncate. This serializes against mmu_notifier_unregister with
27 * the mmu_notifier_mm->lock in addition to RCU and it serializes
28 * against the other mmu notifiers with RCU. struct mmu_notifier_mm
29 * can't go away from under us as exit_mmap holds an mm_count pin
30 * itself.
31 */
32void __mmu_notifier_release(struct mm_struct *mm)
33{
34 struct mmu_notifier *mn;
35
36 spin_lock(&mm->mmu_notifier_mm->lock);
37 while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) {
38 mn = hlist_entry(mm->mmu_notifier_mm->list.first,
39 struct mmu_notifier,
40 hlist);
41 /*
42 * We arrived before mmu_notifier_unregister so
43 * mmu_notifier_unregister will do nothing other than
44 * to wait ->release to finish and
45 * mmu_notifier_unregister to return.
46 */
47 hlist_del_init_rcu(&mn->hlist);
48 /*
49 * RCU here will block mmu_notifier_unregister until
50 * ->release returns.
51 */
52 rcu_read_lock();
53 spin_unlock(&mm->mmu_notifier_mm->lock);
54 /*
55 * if ->release runs before mmu_notifier_unregister it
56 * must be handled as it's the only way for the driver
57 * to flush all existing sptes and stop the driver
58 * from establishing any more sptes before all the
59 * pages in the mm are freed.
60 */
61 if (mn->ops->release)
62 mn->ops->release(mn, mm);
63 rcu_read_unlock();
64 spin_lock(&mm->mmu_notifier_mm->lock);
65 }
66 spin_unlock(&mm->mmu_notifier_mm->lock);
67
68 /*
69 * synchronize_rcu here prevents mmu_notifier_release to
70 * return to exit_mmap (which would proceed freeing all pages
71 * in the mm) until the ->release method returns, if it was
72 * invoked by mmu_notifier_unregister.
73 *
74 * The mmu_notifier_mm can't go away from under us because one
75 * mm_count is hold by exit_mmap.
76 */
77 synchronize_rcu();
78}
79
80/*
81 * If no young bitflag is supported by the hardware, ->clear_flush_young can
82 * unmap the address and return 1 or 0 depending if the mapping previously
83 * existed or not.
84 */
85int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
86 unsigned long address)
87{
88 struct mmu_notifier *mn;
89 struct hlist_node *n;
90 int young = 0;
91
92 rcu_read_lock();
93 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
94 if (mn->ops->clear_flush_young)
95 young |= mn->ops->clear_flush_young(mn, mm, address);
96 }
97 rcu_read_unlock();
98
99 return young;
100}
101
102void __mmu_notifier_invalidate_page(struct mm_struct *mm,
103 unsigned long address)
104{
105 struct mmu_notifier *mn;
106 struct hlist_node *n;
107
108 rcu_read_lock();
109 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
110 if (mn->ops->invalidate_page)
111 mn->ops->invalidate_page(mn, mm, address);
112 }
113 rcu_read_unlock();
114}
115
116void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
117 unsigned long start, unsigned long end)
118{
119 struct mmu_notifier *mn;
120 struct hlist_node *n;
121
122 rcu_read_lock();
123 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
124 if (mn->ops->invalidate_range_start)
125 mn->ops->invalidate_range_start(mn, mm, start, end);
126 }
127 rcu_read_unlock();
128}
129
130void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
131 unsigned long start, unsigned long end)
132{
133 struct mmu_notifier *mn;
134 struct hlist_node *n;
135
136 rcu_read_lock();
137 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
138 if (mn->ops->invalidate_range_end)
139 mn->ops->invalidate_range_end(mn, mm, start, end);
140 }
141 rcu_read_unlock();
142}
143
144static int do_mmu_notifier_register(struct mmu_notifier *mn,
145 struct mm_struct *mm,
146 int take_mmap_sem)
147{
148 struct mmu_notifier_mm *mmu_notifier_mm;
149 int ret;
150
151 BUG_ON(atomic_read(&mm->mm_users) <= 0);
152
153 ret = -ENOMEM;
154 mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL);
155 if (unlikely(!mmu_notifier_mm))
156 goto out;
157
158 if (take_mmap_sem)
159 down_write(&mm->mmap_sem);
160 ret = mm_take_all_locks(mm);
161 if (unlikely(ret))
162 goto out_cleanup;
163
164 if (!mm_has_notifiers(mm)) {
165 INIT_HLIST_HEAD(&mmu_notifier_mm->list);
166 spin_lock_init(&mmu_notifier_mm->lock);
167 mm->mmu_notifier_mm = mmu_notifier_mm;
168 mmu_notifier_mm = NULL;
169 }
170 atomic_inc(&mm->mm_count);
171
172 /*
173 * Serialize the update against mmu_notifier_unregister. A
174 * side note: mmu_notifier_release can't run concurrently with
175 * us because we hold the mm_users pin (either implicitly as
176 * current->mm or explicitly with get_task_mm() or similar).
177 * We can't race against any other mmu notifier method either
178 * thanks to mm_take_all_locks().
179 */
180 spin_lock(&mm->mmu_notifier_mm->lock);
181 hlist_add_head(&mn->hlist, &mm->mmu_notifier_mm->list);
182 spin_unlock(&mm->mmu_notifier_mm->lock);
183
184 mm_drop_all_locks(mm);
185out_cleanup:
186 if (take_mmap_sem)
187 up_write(&mm->mmap_sem);
188 /* kfree() does nothing if mmu_notifier_mm is NULL */
189 kfree(mmu_notifier_mm);
190out:
191 BUG_ON(atomic_read(&mm->mm_users) <= 0);
192 return ret;
193}
194
195/*
196 * Must not hold mmap_sem nor any other VM related lock when calling
197 * this registration function. Must also ensure mm_users can't go down
198 * to zero while this runs to avoid races with mmu_notifier_release,
199 * so mm has to be current->mm or the mm should be pinned safely such
200 * as with get_task_mm(). If the mm is not current->mm, the mm_users
201 * pin should be released by calling mmput after mmu_notifier_register
202 * returns. mmu_notifier_unregister must be always called to
203 * unregister the notifier. mm_count is automatically pinned to allow
204 * mmu_notifier_unregister to safely run at any time later, before or
205 * after exit_mmap. ->release will always be called before exit_mmap
206 * frees the pages.
207 */
208int mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm)
209{
210 return do_mmu_notifier_register(mn, mm, 1);
211}
212EXPORT_SYMBOL_GPL(mmu_notifier_register);
213
214/*
215 * Same as mmu_notifier_register but here the caller must hold the
216 * mmap_sem in write mode.
217 */
218int __mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm)
219{
220 return do_mmu_notifier_register(mn, mm, 0);
221}
222EXPORT_SYMBOL_GPL(__mmu_notifier_register);
223
224/* this is called after the last mmu_notifier_unregister() returned */
225void __mmu_notifier_mm_destroy(struct mm_struct *mm)
226{
227 BUG_ON(!hlist_empty(&mm->mmu_notifier_mm->list));
228 kfree(mm->mmu_notifier_mm);
229 mm->mmu_notifier_mm = LIST_POISON1; /* debug */
230}
231
232/*
233 * This releases the mm_count pin automatically and frees the mm
234 * structure if it was the last user of it. It serializes against
235 * running mmu notifiers with RCU and against mmu_notifier_unregister
236 * with the unregister lock + RCU. All sptes must be dropped before
237 * calling mmu_notifier_unregister. ->release or any other notifier
238 * method may be invoked concurrently with mmu_notifier_unregister,
239 * and only after mmu_notifier_unregister returned we're guaranteed
240 * that ->release or any other method can't run anymore.
241 */
242void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
243{
244 BUG_ON(atomic_read(&mm->mm_count) <= 0);
245
246 spin_lock(&mm->mmu_notifier_mm->lock);
247 if (!hlist_unhashed(&mn->hlist)) {
248 hlist_del_rcu(&mn->hlist);
249
250 /*
251 * RCU here will force exit_mmap to wait ->release to finish
252 * before freeing the pages.
253 */
254 rcu_read_lock();
255 spin_unlock(&mm->mmu_notifier_mm->lock);
256 /*
257 * exit_mmap will block in mmu_notifier_release to
258 * guarantee ->release is called before freeing the
259 * pages.
260 */
261 if (mn->ops->release)
262 mn->ops->release(mn, mm);
263 rcu_read_unlock();
264 } else
265 spin_unlock(&mm->mmu_notifier_mm->lock);
266
267 /*
268 * Wait any running method to finish, of course including
269 * ->release if it was run by mmu_notifier_relase instead of us.
270 */
271 synchronize_rcu();
272
273 BUG_ON(atomic_read(&mm->mm_count) <= 0);
274
275 mmdrop(mm);
276}
277EXPORT_SYMBOL_GPL(mmu_notifier_unregister);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index abd645a3b0a..fded06f923f 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -21,6 +21,7 @@
21#include <linux/syscalls.h> 21#include <linux/syscalls.h>
22#include <linux/swap.h> 22#include <linux/swap.h>
23#include <linux/swapops.h> 23#include <linux/swapops.h>
24#include <linux/mmu_notifier.h>
24#include <asm/uaccess.h> 25#include <asm/uaccess.h>
25#include <asm/pgtable.h> 26#include <asm/pgtable.h>
26#include <asm/cacheflush.h> 27#include <asm/cacheflush.h>
@@ -203,10 +204,12 @@ success:
203 dirty_accountable = 1; 204 dirty_accountable = 1;
204 } 205 }
205 206
207 mmu_notifier_invalidate_range_start(mm, start, end);
206 if (is_vm_hugetlb_page(vma)) 208 if (is_vm_hugetlb_page(vma))
207 hugetlb_change_protection(vma, start, end, vma->vm_page_prot); 209 hugetlb_change_protection(vma, start, end, vma->vm_page_prot);
208 else 210 else
209 change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable); 211 change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable);
212 mmu_notifier_invalidate_range_end(mm, start, end);
210 vm_stat_account(mm, oldflags, vma->vm_file, -nrpages); 213 vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
211 vm_stat_account(mm, newflags, vma->vm_file, nrpages); 214 vm_stat_account(mm, newflags, vma->vm_file, nrpages);
212 return 0; 215 return 0;
diff --git a/mm/mremap.c b/mm/mremap.c
index 08e3c7f2bd1..1a7743923c8 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -18,6 +18,7 @@
18#include <linux/highmem.h> 18#include <linux/highmem.h>
19#include <linux/security.h> 19#include <linux/security.h>
20#include <linux/syscalls.h> 20#include <linux/syscalls.h>
21#include <linux/mmu_notifier.h>
21 22
22#include <asm/uaccess.h> 23#include <asm/uaccess.h>
23#include <asm/cacheflush.h> 24#include <asm/cacheflush.h>
@@ -74,7 +75,11 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
74 struct mm_struct *mm = vma->vm_mm; 75 struct mm_struct *mm = vma->vm_mm;
75 pte_t *old_pte, *new_pte, pte; 76 pte_t *old_pte, *new_pte, pte;
76 spinlock_t *old_ptl, *new_ptl; 77 spinlock_t *old_ptl, *new_ptl;
78 unsigned long old_start;
77 79
80 old_start = old_addr;
81 mmu_notifier_invalidate_range_start(vma->vm_mm,
82 old_start, old_end);
78 if (vma->vm_file) { 83 if (vma->vm_file) {
79 /* 84 /*
80 * Subtle point from Rajesh Venkatasubramanian: before 85 * Subtle point from Rajesh Venkatasubramanian: before
@@ -116,6 +121,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
116 pte_unmap_unlock(old_pte - 1, old_ptl); 121 pte_unmap_unlock(old_pte - 1, old_ptl);
117 if (mapping) 122 if (mapping)
118 spin_unlock(&mapping->i_mmap_lock); 123 spin_unlock(&mapping->i_mmap_lock);
124 mmu_notifier_invalidate_range_end(vma->vm_mm, old_start, old_end);
119} 125}
120 126
121#define LATENCY_LIMIT (64 * PAGE_SIZE) 127#define LATENCY_LIMIT (64 * PAGE_SIZE)
diff --git a/mm/rmap.c b/mm/rmap.c
index 39ae5a9bf38..99bc3f9cd79 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -49,6 +49,7 @@
49#include <linux/module.h> 49#include <linux/module.h>
50#include <linux/kallsyms.h> 50#include <linux/kallsyms.h>
51#include <linux/memcontrol.h> 51#include <linux/memcontrol.h>
52#include <linux/mmu_notifier.h>
52 53
53#include <asm/tlbflush.h> 54#include <asm/tlbflush.h>
54 55
@@ -287,7 +288,7 @@ static int page_referenced_one(struct page *page,
287 if (vma->vm_flags & VM_LOCKED) { 288 if (vma->vm_flags & VM_LOCKED) {
288 referenced++; 289 referenced++;
289 *mapcount = 1; /* break early from loop */ 290 *mapcount = 1; /* break early from loop */
290 } else if (ptep_clear_flush_young(vma, address, pte)) 291 } else if (ptep_clear_flush_young_notify(vma, address, pte))
291 referenced++; 292 referenced++;
292 293
293 /* Pretend the page is referenced if the task has the 294 /* Pretend the page is referenced if the task has the
@@ -457,7 +458,7 @@ static int page_mkclean_one(struct page *page, struct vm_area_struct *vma)
457 pte_t entry; 458 pte_t entry;
458 459
459 flush_cache_page(vma, address, pte_pfn(*pte)); 460 flush_cache_page(vma, address, pte_pfn(*pte));
460 entry = ptep_clear_flush(vma, address, pte); 461 entry = ptep_clear_flush_notify(vma, address, pte);
461 entry = pte_wrprotect(entry); 462 entry = pte_wrprotect(entry);
462 entry = pte_mkclean(entry); 463 entry = pte_mkclean(entry);
463 set_pte_at(mm, address, pte, entry); 464 set_pte_at(mm, address, pte, entry);
@@ -705,14 +706,14 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
705 * skipped over this mm) then we should reactivate it. 706 * skipped over this mm) then we should reactivate it.
706 */ 707 */
707 if (!migration && ((vma->vm_flags & VM_LOCKED) || 708 if (!migration && ((vma->vm_flags & VM_LOCKED) ||
708 (ptep_clear_flush_young(vma, address, pte)))) { 709 (ptep_clear_flush_young_notify(vma, address, pte)))) {
709 ret = SWAP_FAIL; 710 ret = SWAP_FAIL;
710 goto out_unmap; 711 goto out_unmap;
711 } 712 }
712 713
713 /* Nuke the page table entry. */ 714 /* Nuke the page table entry. */
714 flush_cache_page(vma, address, page_to_pfn(page)); 715 flush_cache_page(vma, address, page_to_pfn(page));
715 pteval = ptep_clear_flush(vma, address, pte); 716 pteval = ptep_clear_flush_notify(vma, address, pte);
716 717
717 /* Move the dirty bit to the physical page now the pte is gone. */ 718 /* Move the dirty bit to the physical page now the pte is gone. */
718 if (pte_dirty(pteval)) 719 if (pte_dirty(pteval))
@@ -837,12 +838,12 @@ static void try_to_unmap_cluster(unsigned long cursor,
837 page = vm_normal_page(vma, address, *pte); 838 page = vm_normal_page(vma, address, *pte);
838 BUG_ON(!page || PageAnon(page)); 839 BUG_ON(!page || PageAnon(page));
839 840
840 if (ptep_clear_flush_young(vma, address, pte)) 841 if (ptep_clear_flush_young_notify(vma, address, pte))
841 continue; 842 continue;
842 843
843 /* Nuke the page table entry. */ 844 /* Nuke the page table entry. */
844 flush_cache_page(vma, address, pte_pfn(*pte)); 845 flush_cache_page(vma, address, pte_pfn(*pte));
845 pteval = ptep_clear_flush(vma, address, pte); 846 pteval = ptep_clear_flush_notify(vma, address, pte);
846 847
847 /* If nonlinear, store the file page offset in the pte. */ 848 /* If nonlinear, store the file page offset in the pte. */
848 if (page->index != linear_page_index(vma, address)) 849 if (page->index != linear_page_index(vma, address))
diff --git a/mm/shmem.c b/mm/shmem.c
index 952d361774b..c1e5a3b4f75 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1513,7 +1513,6 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
1513 inode->i_uid = current->fsuid; 1513 inode->i_uid = current->fsuid;
1514 inode->i_gid = current->fsgid; 1514 inode->i_gid = current->fsgid;
1515 inode->i_blocks = 0; 1515 inode->i_blocks = 0;
1516 inode->i_mapping->a_ops = &shmem_aops;
1517 inode->i_mapping->backing_dev_info = &shmem_backing_dev_info; 1516 inode->i_mapping->backing_dev_info = &shmem_backing_dev_info;
1518 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 1517 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
1519 inode->i_generation = get_seconds(); 1518 inode->i_generation = get_seconds();
@@ -1528,6 +1527,7 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
1528 init_special_inode(inode, mode, dev); 1527 init_special_inode(inode, mode, dev);
1529 break; 1528 break;
1530 case S_IFREG: 1529 case S_IFREG:
1530 inode->i_mapping->a_ops = &shmem_aops;
1531 inode->i_op = &shmem_inode_operations; 1531 inode->i_op = &shmem_inode_operations;
1532 inode->i_fop = &shmem_file_operations; 1532 inode->i_fop = &shmem_file_operations;
1533 mpol_shared_policy_init(&info->policy, 1533 mpol_shared_policy_init(&info->policy,
@@ -1929,6 +1929,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
1929 return error; 1929 return error;
1930 } 1930 }
1931 unlock_page(page); 1931 unlock_page(page);
1932 inode->i_mapping->a_ops = &shmem_aops;
1932 inode->i_op = &shmem_symlink_inode_operations; 1933 inode->i_op = &shmem_symlink_inode_operations;
1933 kaddr = kmap_atomic(page, KM_USER0); 1934 kaddr = kmap_atomic(page, KM_USER0);
1934 memcpy(kaddr, symname, len); 1935 memcpy(kaddr, symname, len);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 835d2741308..5a32cb7c4bb 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -310,8 +310,7 @@ svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx)
310 switch (m->mode) { 310 switch (m->mode) {
311 case SVC_POOL_PERCPU: 311 case SVC_POOL_PERCPU:
312 { 312 {
313 cpumask_of_cpu_ptr(cpumask, node); 313 set_cpus_allowed_ptr(task, &cpumask_of_cpu(node));
314 set_cpus_allowed_ptr(task, cpumask);
315 break; 314 break;
316 } 315 }
317 case SVC_POOL_PERNODE: 316 case SVC_POOL_PERNODE: