5 files changed, 160 insertions, 26 deletions
diff --git a/Documentation/x86_64/machinecheck b/Documentation/x86_64/machinecheck
new file mode 100644
index 000000000000..068a6d9904b9
--- /dev/null
+++ b/Documentation/x86_64/machinecheck
@@ -0,0 +1,70 @@
+Configurable sysfs parameters for the x86-64 machine check code.
+Machine checks report internal hardware error conditions detected
+by the CPU. Uncorrected errors typically cause a machine check
+(often with panic), corrected ones cause a machine check log entry.
+Machine checks are organized in banks (normally associated with
+a hardware subsystem) and subevents in a bank. The exact meaning
+of the banks and subevent is CPU specific.
+mcelog knows how to decode them.
+When you see the "Machine check errors logged" message in the system
+log then mcelog should run to collect and decode machine check entries
+from /dev/mcelog. Normally mcelog should be run regularly from a cronjob.
+Each CPU has a directory in /sys/devices/system/machinecheck/machinecheckN
+(N = CPU number)
+The directory contains some configurable entries:
+Entries:
+bankNctl
+(N bank number)
+        64bit Hex bitmask enabling/disabling specific subevents for bank N
+        When a bit in the bitmask is zero then the respective
+        subevent will not be reported.
+        By default all events are enabled.
+        Note that BIOS maintain another mask to disable specific events
+        per bank.  This is not visible here
+The following entries appear for each CPU, but they are truly shared
+between all CPUs.
+check_interval
+        How often to poll for corrected machine check errors, in seconds
+        (Note output is hexademical). Default 5 minutes.
+tolerant
+        Tolerance level. When a machine check exception occurs for a non
+        corrected machine check the kernel can take different actions.
+        Since machine check exceptions can happen any time it is sometimes
+        risky for the kernel to kill a process because it defies
+        normal kernel locking rules. The tolerance level configures
+        how hard the kernel tries to recover even at some risk of deadlock.
+        0: always panic,
+        1: panic if deadlock possible,
+        2: try to avoid panic,
+        3: never panic or exit (for testing only)
+        Default: 1
+        Note this only makes a difference if the CPU allows recovery
+        from a machine check exception. Current x86 CPUs generally do not.
+trigger
+        Program to run when a machine check event is detected.
+        This is an alternative to running mcelog regularly from cron
+        and allows to detect events faster.
+TBD document entries for AMD threshold interrupt configuration
+For more details about the x86 machine check architecture
+see the Intel and AMD architecture manuals from their developer websites.
+For more details about the architecture see
+see http://one.firstfloor.org/~andi/mce.pdf
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index bdb54a2c9f18..8011a8e1c7d4 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -19,6 +19,7 @@
 #include <linux/cpu.h>
 #include <linux/percpu.h>
 #include <linux/ctype.h>
+#include <linux/kmod.h>
 #include <asm/processor.h> 
 #include <asm/msr.h>
 #include <asm/mce.h>
@@ -42,6 +43,10 @@ static unsigned long console_logged;
 static int notify_user;
 static int rip_msr;
 static int mce_bootlog = 1;
+static atomic_t mce_events;
+static char trigger[128];
+static char *trigger_argv[2] = { trigger, NULL };
 /*
 * Lockless MCE logging infrastructure.
@@ -57,6 +62,7 @@ struct mce_log mcelog = {
 void mce_log(struct mce *mce)
 {
        unsigned next, entry;
+        atomic_inc(&mce_events);
        mce->finished = 0;
        wmb();
        for (;;) {
@@ -161,6 +167,17 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
        }
 }
+static void do_mce_trigger(void)
+{
+        static atomic_t mce_logged;
+        int events = atomic_read(&mce_events);
+        if (events != atomic_read(&mce_logged) && trigger[0]) {
+                /* Small race window, but should be harmless.  */
+                atomic_set(&mce_logged, events);
+                call_usermodehelper(trigger, trigger_argv, NULL, -1);
+        }
+}
 /* 
 * The actual machine check handler
 */
@@ -234,8 +251,12 @@ void do_machine_check(struct pt_regs * regs, long error_code)
        }
        /* Never do anything final in the polling timer */
-        if (!regs)
+        if (!regs) {
+                /* Normal interrupt context here. Call trigger for any new
+                   events. */
+                do_mce_trigger();
                goto out;
+        }
        /* If we didn't find an uncorrectable error, pick
           the last one (shouldn't happen, just being safe). */
@@ -606,17 +627,42 @@ DEFINE_PER_CPU(struct sys_device, device_mce);
        }                                                                          \
        static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
+/* TBD should generate these dynamically based on number of available banks */
 ACCESSOR(bank0ctl,bank[0],mce_restart())
 ACCESSOR(bank1ctl,bank[1],mce_restart())
 ACCESSOR(bank2ctl,bank[2],mce_restart())
 ACCESSOR(bank3ctl,bank[3],mce_restart())
 ACCESSOR(bank4ctl,bank[4],mce_restart())
 ACCESSOR(bank5ctl,bank[5],mce_restart())
-static struct sysdev_attribute * bank_attributes[NR_BANKS] = {
-        &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl,
+static ssize_t show_trigger(struct sys_device *s, char *buf)
-        &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl};
+{
+        strcpy(buf, trigger);
+        strcat(buf, "\n");
+        return strlen(trigger) + 1;
+}
+static ssize_t set_trigger(struct sys_device *s,const char *buf,size_t siz)
+{
+        char *p;
+        int len;
+        strncpy(trigger, buf, sizeof(trigger));
+        trigger[sizeof(trigger)-1] = 0;
+        len = strlen(trigger);
+        p = strchr(trigger, '\n');
+        if (*p) *p = 0;
+        return len;
+}
+static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
 ACCESSOR(tolerant,tolerant,)
 ACCESSOR(check_interval,check_interval,mce_restart())
+static struct sysdev_attribute *mce_attributes[] = {
+        &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl,
+        &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl,
+        &attr_tolerant, &attr_check_interval, &attr_trigger,
+        NULL
+};
 /* Per cpu sysdev init.  All of the cpus still share the same ctl bank */
 static __cpuinit int mce_create_device(unsigned int cpu)
@@ -632,11 +678,9 @@ static __cpuinit int mce_create_device(unsigned int cpu)
        err = sysdev_register(&per_cpu(device_mce,cpu));
        if (!err) {
-                for (i = 0; i < banks; i++)
+                for (i = 0; mce_attributes[i]; i++)
                        sysdev_create_file(&per_cpu(device_mce,cpu),
-                                bank_attributes[i]);
+                                mce_attributes[i]);
-                sysdev_create_file(&per_cpu(device_mce,cpu), &attr_tolerant);
-                sysdev_create_file(&per_cpu(device_mce,cpu), &attr_check_interval);
        }
        return err;
 }
@@ -645,11 +689,9 @@ static void mce_remove_device(unsigned int cpu)
 {
        int i;
-        for (i = 0; i < banks; i++)
+        for (i = 0; mce_attributes[i]; i++)
                sysdev_remove_file(&per_cpu(device_mce,cpu),
-                        bank_attributes[i]);
+                        mce_attributes[i]);
-        sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant);
-        sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval);
        sysdev_unregister(&per_cpu(device_mce,cpu));
        memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject));
 }
diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c
index cd8dbe57b33a..d0bd5d66e103 100644
--- a/arch/x86_64/kernel/mce_amd.c
+++ b/arch/x86_64/kernel/mce_amd.c
@@ -220,6 +220,10 @@ asmlinkage void mce_threshold_interrupt(void)
                             (high & MASK_LOCKED_HI))
                                continue;
+                        /* Log the machine check that caused the threshold
+                           event. */
+                        do_machine_check(NULL, 0);
                        if (high & MASK_OVERFLOW_HI) {
                                rdmsrl(address, m.misc);
                                rdmsrl(MSR_IA32_MC0_STATUS + bank * 4,
diff --git a/include/asm-x86_64/mce.h b/include/asm-x86_64/mce.h
index 5a11146d6d9c..177e92b4019b 100644
--- a/include/asm-x86_64/mce.h
+++ b/include/asm-x86_64/mce.h
@@ -103,6 +103,8 @@ void mce_log_therm_throt_event(unsigned int cpu, __u64 status);
 extern atomic_t mce_entry;
+extern void do_machine_check(struct pt_regs *, long);
 #endif
 #endif
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 3a7379aa31ca..796276141e51 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -217,7 +217,10 @@ static int wait_for_helper(void *data)
                        sub_info->retval = ret;
        }
-        complete(sub_info->complete);
+        if (sub_info->wait < 0)
+                kfree(sub_info);
+        else
+                complete(sub_info->complete);
        return 0;
 }
@@ -239,6 +242,9 @@ static void __call_usermodehelper(struct work_struct *work)
                pid = kernel_thread(____call_usermodehelper, sub_info,
                                    CLONE_VFORK | SIGCHLD);
+        if (wait < 0)
+                return;
        if (pid < 0) {
                sub_info->retval = pid;
                complete(sub_info->complete);
@@ -253,6 +259,9 @@ static void __call_usermodehelper(struct work_struct *work)
 * @envp: null-terminated environment list
 * @session_keyring: session keyring for process (NULL for an empty keyring)
 * @wait: wait for the application to finish and return status.
+ *        when -1 don't wait at all, but you get no useful error back when
+ *        the program couldn't be exec'ed. This makes it safe to call
+ *        from interrupt context.
 *
 * Runs a user-space application.  The application is started
 * asynchronously if wait is not set, and runs as a child of keventd.
@@ -265,17 +274,8 @@ int call_usermodehelper_keys(char *path, char **argv, char **envp,
                             struct key *session_keyring, int wait)
 {
        DECLARE_COMPLETION_ONSTACK(done);
-        struct subprocess_info sub_info = {
+        struct subprocess_info *sub_info;
-                .work           = __WORK_INITIALIZER(sub_info.work,
+        int retval;
-                                                     __call_usermodehelper),
-                .complete       = &done,
-                .path           = path,
-                .argv           = argv,
-                .envp           = envp,
-                .ring           = session_keyring,
-                .wait           = wait,
-                .retval         = 0,
-        };
        if (!khelper_wq)
                return -EBUSY;
@@ -283,9 +283,25 @@ int call_usermodehelper_keys(char *path, char **argv, char **envp,
        if (path[0] == '\0')
                return 0;
-        queue_work(khelper_wq, &sub_info.work);
+        sub_info = kzalloc(sizeof(struct subprocess_info),  GFP_ATOMIC);
+        if (!sub_info)
+                return -ENOMEM;
+        INIT_WORK(&sub_info->work, __call_usermodehelper);
+        sub_info->complete = &done;
+        sub_info->path = path;
+        sub_info->argv = argv;
+        sub_info->envp = envp;
+        sub_info->ring = session_keyring;
+        sub_info->wait = wait;
+        queue_work(khelper_wq, &sub_info->work);
+        if (wait < 0) /* task has freed sub_info */
+                return 0;
        wait_for_completion(&done);
-        return sub_info.retval;
+        retval = sub_info->retval;
+        kfree(sub_info);
+        return retval;
 }
 EXPORT_SYMBOL(call_usermodehelper_keys);

diff --git a/Documentation/x86_64/machinecheck b/Documentation/x86_64/machinecheck new file mode 100644 index 000000000000..068a6d9904b9 --- /dev/null +++ b/Documentation/x86_64/machinecheck
@@ -0,0 +1,70 @@
		1
		2	Configurable sysfs parameters for the x86-64 machine check code.
		3
		4	Machine checks report internal hardware error conditions detected
		5	by the CPU. Uncorrected errors typically cause a machine check
		6	(often with panic), corrected ones cause a machine check log entry.
		7
		8	Machine checks are organized in banks (normally associated with
		9	a hardware subsystem) and subevents in a bank. The exact meaning
		10	of the banks and subevent is CPU specific.
		11
		12	mcelog knows how to decode them.
		13
		14	When you see the "Machine check errors logged" message in the system
		15	log then mcelog should run to collect and decode machine check entries
		16	from /dev/mcelog. Normally mcelog should be run regularly from a cronjob.
		17
		18	Each CPU has a directory in /sys/devices/system/machinecheck/machinecheckN
		19	(N = CPU number)
		20
		21	The directory contains some configurable entries:
		22
		23	Entries:
		24
		25	bankNctl
		26	(N bank number)
		27	64bit Hex bitmask enabling/disabling specific subevents for bank N
		28	When a bit in the bitmask is zero then the respective
		29	subevent will not be reported.
		30	By default all events are enabled.
		31	Note that BIOS maintain another mask to disable specific events
		32	per bank. This is not visible here
		33
		34	The following entries appear for each CPU, but they are truly shared
		35	between all CPUs.
		36
		37	check_interval
		38	How often to poll for corrected machine check errors, in seconds
		39	(Note output is hexademical). Default 5 minutes.
		40
		41	tolerant
		42	Tolerance level. When a machine check exception occurs for a non
		43	corrected machine check the kernel can take different actions.
		44	Since machine check exceptions can happen any time it is sometimes
		45	risky for the kernel to kill a process because it defies
		46	normal kernel locking rules. The tolerance level configures
		47	how hard the kernel tries to recover even at some risk of deadlock.
		48
		49	0: always panic,
		50	1: panic if deadlock possible,
		51	2: try to avoid panic,
		52	3: never panic or exit (for testing only)
		53
		54	Default: 1
		55
		56	Note this only makes a difference if the CPU allows recovery
		57	from a machine check exception. Current x86 CPUs generally do not.
		58
		59	trigger
		60	Program to run when a machine check event is detected.
		61	This is an alternative to running mcelog regularly from cron
		62	and allows to detect events faster.
		63
		64	TBD document entries for AMD threshold interrupt configuration
		65
		66	For more details about the x86 machine check architecture
		67	see the Intel and AMD architecture manuals from their developer websites.
		68
		69	For more details about the architecture see
		70	see http://one.firstfloor.org/~andi/mce.pdf


diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index bdb54a2c9f18..8011a8e1c7d4 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c
@@ -19,6 +19,7 @@
19	#include <linux/cpu.h>	19	#include <linux/cpu.h>
20	#include <linux/percpu.h>	20	#include <linux/percpu.h>
21	#include <linux/ctype.h>	21	#include <linux/ctype.h>
		22	#include <linux/kmod.h>
22	#include <asm/processor.h>	23	#include <asm/processor.h>
23	#include <asm/msr.h>	24	#include <asm/msr.h>
24	#include <asm/mce.h>	25	#include <asm/mce.h>
@@ -42,6 +43,10 @@ static unsigned long console_logged;
42	static int notify_user;	43	static int notify_user;
43	static int rip_msr;	44	static int rip_msr;
44	static int mce_bootlog = 1;	45	static int mce_bootlog = 1;
		46	static atomic_t mce_events;
		47
		48	static char trigger[128];
		49	static char *trigger_argv[2] = { trigger, NULL };
45		50
46	/*	51	/*
47	* Lockless MCE logging infrastructure.	52	* Lockless MCE logging infrastructure.
@@ -57,6 +62,7 @@ struct mce_log mcelog = {
57	void mce_log(struct mce *mce)	62	void mce_log(struct mce *mce)
58	{	63	{
59	unsigned next, entry;	64	unsigned next, entry;
		65	atomic_inc(&mce_events);
60	mce->finished = 0;	66	mce->finished = 0;
61	wmb();	67	wmb();
62	for (;;) {	68	for (;;) {
@@ -161,6 +167,17 @@ static inline void mce_get_rip(struct mce m, struct pt_regs regs)
161	}	167	}
162	}	168	}
163		169
		170	static void do_mce_trigger(void)
		171	{
		172	static atomic_t mce_logged;
		173	int events = atomic_read(&mce_events);
		174	if (events != atomic_read(&mce_logged) && trigger[0]) {
		175	/* Small race window, but should be harmless. */
		176	atomic_set(&mce_logged, events);
		177	call_usermodehelper(trigger, trigger_argv, NULL, -1);
		178	}
		179	}
		180
164	/*	181	/*
165	* The actual machine check handler	182	* The actual machine check handler
166	*/	183	*/
@@ -234,8 +251,12 @@ void do_machine_check(struct pt_regs * regs, long error_code)
234	}	251	}
235		252
236	/* Never do anything final in the polling timer */	253	/* Never do anything final in the polling timer */
237	if (!regs)	254	if (!regs) {
		255	/* Normal interrupt context here. Call trigger for any new
		256	events. */
		257	do_mce_trigger();
238	goto out;	258	goto out;
		259	}
239		260
240	/* If we didn't find an uncorrectable error, pick	261	/* If we didn't find an uncorrectable error, pick
241	the last one (shouldn't happen, just being safe). */	262	the last one (shouldn't happen, just being safe). */
@@ -606,17 +627,42 @@ DEFINE_PER_CPU(struct sys_device, device_mce);
606	} \	627	} \
607	static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);	628	static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
608		629
		630	/* TBD should generate these dynamically based on number of available banks */
609	ACCESSOR(bank0ctl,bank[0],mce_restart())	631	ACCESSOR(bank0ctl,bank[0],mce_restart())
610	ACCESSOR(bank1ctl,bank[1],mce_restart())	632	ACCESSOR(bank1ctl,bank[1],mce_restart())
611	ACCESSOR(bank2ctl,bank[2],mce_restart())	633	ACCESSOR(bank2ctl,bank[2],mce_restart())
612	ACCESSOR(bank3ctl,bank[3],mce_restart())	634	ACCESSOR(bank3ctl,bank[3],mce_restart())
613	ACCESSOR(bank4ctl,bank[4],mce_restart())	635	ACCESSOR(bank4ctl,bank[4],mce_restart())
614	ACCESSOR(bank5ctl,bank[5],mce_restart())	636	ACCESSOR(bank5ctl,bank[5],mce_restart())
615	static struct sysdev_attribute * bank_attributes[NR_BANKS] = {	637
616	&attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl,	638	static ssize_t show_trigger(struct sys_device s, char buf)
617	&attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl};	639	{
		640	strcpy(buf, trigger);
		641	strcat(buf, "\n");
		642	return strlen(trigger) + 1;
		643	}
		644
		645	static ssize_t set_trigger(struct sys_device s,const char buf,size_t siz)
		646	{
		647	char *p;
		648	int len;
		649	strncpy(trigger, buf, sizeof(trigger));
		650	trigger[sizeof(trigger)-1] = 0;
		651	len = strlen(trigger);
		652	p = strchr(trigger, '\n');
		653	if (p) p = 0;
		654	return len;
		655	}
		656
		657	static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
618	ACCESSOR(tolerant,tolerant,)	658	ACCESSOR(tolerant,tolerant,)
619	ACCESSOR(check_interval,check_interval,mce_restart())	659	ACCESSOR(check_interval,check_interval,mce_restart())
		660	static struct sysdev_attribute *mce_attributes[] = {
		661	&attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl,
		662	&attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl,
		663	&attr_tolerant, &attr_check_interval, &attr_trigger,
		664	NULL
		665	};
620		666
621	/* Per cpu sysdev init. All of the cpus still share the same ctl bank */	667	/* Per cpu sysdev init. All of the cpus still share the same ctl bank */
622	static __cpuinit int mce_create_device(unsigned int cpu)	668	static __cpuinit int mce_create_device(unsigned int cpu)
@@ -632,11 +678,9 @@ static __cpuinit int mce_create_device(unsigned int cpu)
632	err = sysdev_register(&per_cpu(device_mce,cpu));	678	err = sysdev_register(&per_cpu(device_mce,cpu));
633		679
634	if (!err) {	680	if (!err) {
635	for (i = 0; i < banks; i++)	681	for (i = 0; mce_attributes[i]; i++)
636	sysdev_create_file(&per_cpu(device_mce,cpu),	682	sysdev_create_file(&per_cpu(device_mce,cpu),
637	bank_attributes[i]);	683	mce_attributes[i]);
638	sysdev_create_file(&per_cpu(device_mce,cpu), &attr_tolerant);
639	sysdev_create_file(&per_cpu(device_mce,cpu), &attr_check_interval);
640	}	684	}
641	return err;	685	return err;
642	}	686	}
@@ -645,11 +689,9 @@ static void mce_remove_device(unsigned int cpu)
645	{	689	{
646	int i;	690	int i;
647		691
648	for (i = 0; i < banks; i++)	692	for (i = 0; mce_attributes[i]; i++)
649	sysdev_remove_file(&per_cpu(device_mce,cpu),	693	sysdev_remove_file(&per_cpu(device_mce,cpu),
650	bank_attributes[i]);	694	mce_attributes[i]);
651	sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant);
652	sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval);
653	sysdev_unregister(&per_cpu(device_mce,cpu));	695	sysdev_unregister(&per_cpu(device_mce,cpu));
654	memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject));	696	memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject));
655	}	697	}


diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c index cd8dbe57b33a..d0bd5d66e103 100644 --- a/arch/x86_64/kernel/mce_amd.c +++ b/arch/x86_64/kernel/mce_amd.c
@@ -220,6 +220,10 @@ asmlinkage void mce_threshold_interrupt(void)
220	(high & MASK_LOCKED_HI))	220	(high & MASK_LOCKED_HI))
221	continue;	221	continue;
222		222
		223	/* Log the machine check that caused the threshold
		224	event. */
		225	do_machine_check(NULL, 0);
		226
223	if (high & MASK_OVERFLOW_HI) {	227	if (high & MASK_OVERFLOW_HI) {
224	rdmsrl(address, m.misc);	228	rdmsrl(address, m.misc);
225	rdmsrl(MSR_IA32_MC0_STATUS + bank * 4,	229	rdmsrl(MSR_IA32_MC0_STATUS + bank * 4,


diff --git a/include/asm-x86_64/mce.h b/include/asm-x86_64/mce.h index 5a11146d6d9c..177e92b4019b 100644 --- a/include/asm-x86_64/mce.h +++ b/include/asm-x86_64/mce.h
@@ -103,6 +103,8 @@ void mce_log_therm_throt_event(unsigned int cpu, __u64 status);
103		103
104	extern atomic_t mce_entry;	104	extern atomic_t mce_entry;
105		105
		106	extern void do_machine_check(struct pt_regs *, long);
		107
106	#endif	108	#endif
107		109
108	#endif	110	#endif


diff --git a/kernel/kmod.c b/kernel/kmod.c index 3a7379aa31ca..796276141e51 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c
@@ -217,7 +217,10 @@ static int wait_for_helper(void *data)
217	sub_info->retval = ret;	217	sub_info->retval = ret;
218	}	218	}
219		219
220	complete(sub_info->complete);	220	if (sub_info->wait < 0)
		221	kfree(sub_info);
		222	else
		223	complete(sub_info->complete);
221	return 0;	224	return 0;
222	}	225	}
223		226
@@ -239,6 +242,9 @@ static void __call_usermodehelper(struct work_struct *work)
239	pid = kernel_thread(____call_usermodehelper, sub_info,	242	pid = kernel_thread(____call_usermodehelper, sub_info,
240	CLONE_VFORK \| SIGCHLD);	243	CLONE_VFORK \| SIGCHLD);
241		244
		245	if (wait < 0)
		246	return;
		247
242	if (pid < 0) {	248	if (pid < 0) {
243	sub_info->retval = pid;	249	sub_info->retval = pid;
244	complete(sub_info->complete);	250	complete(sub_info->complete);
@@ -253,6 +259,9 @@ static void __call_usermodehelper(struct work_struct *work)
253	* @envp: null-terminated environment list	259	* @envp: null-terminated environment list
254	* @session_keyring: session keyring for process (NULL for an empty keyring)	260	* @session_keyring: session keyring for process (NULL for an empty keyring)
255	* @wait: wait for the application to finish and return status.	261	* @wait: wait for the application to finish and return status.
		262	* when -1 don't wait at all, but you get no useful error back when
		263	* the program couldn't be exec'ed. This makes it safe to call
		264	* from interrupt context.
256	*	265	*
257	* Runs a user-space application. The application is started	266	* Runs a user-space application. The application is started
258	* asynchronously if wait is not set, and runs as a child of keventd.	267	* asynchronously if wait is not set, and runs as a child of keventd.
@@ -265,17 +274,8 @@ int call_usermodehelper_keys(char path, char argv, char *envp,
265	struct key *session_keyring, int wait)	274	struct key *session_keyring, int wait)
266	{	275	{
267	DECLARE_COMPLETION_ONSTACK(done);	276	DECLARE_COMPLETION_ONSTACK(done);
268	struct subprocess_info sub_info = {	277	struct subprocess_info *sub_info;
269	.work = __WORK_INITIALIZER(sub_info.work,	278	int retval;
270	__call_usermodehelper),
271	.complete = &done,
272	.path = path,
273	.argv = argv,
274	.envp = envp,
275	.ring = session_keyring,
276	.wait = wait,
277	.retval = 0,
278	};
279		279
280	if (!khelper_wq)	280	if (!khelper_wq)
281	return -EBUSY;	281	return -EBUSY;
@@ -283,9 +283,25 @@ int call_usermodehelper_keys(char path, char argv, char *envp,
283	if (path[0] == '\0')	283	if (path[0] == '\0')
284	return 0;	284	return 0;
285		285
286	queue_work(khelper_wq, &sub_info.work);	286	sub_info = kzalloc(sizeof(struct subprocess_info), GFP_ATOMIC);
		287	if (!sub_info)
		288	return -ENOMEM;
		289
		290	INIT_WORK(&sub_info->work, __call_usermodehelper);
		291	sub_info->complete = &done;
		292	sub_info->path = path;
		293	sub_info->argv = argv;
		294	sub_info->envp = envp;
		295	sub_info->ring = session_keyring;
		296	sub_info->wait = wait;
		297
		298	queue_work(khelper_wq, &sub_info->work);
		299	if (wait < 0) /* task has freed sub_info */
		300	return 0;
287	wait_for_completion(&done);	301	wait_for_completion(&done);
288	return sub_info.retval;	302	retval = sub_info->retval;
		303	kfree(sub_info);
		304	return retval;
289	}	305	}
290	EXPORT_SYMBOL(call_usermodehelper_keys);	306	EXPORT_SYMBOL(call_usermodehelper_keys);
291		307