aboutsummaryrefslogblamecommitdiffstats
path: root/init/main.c
blob: 8b1982082ad8ada65bbd1ff4a1c565c2a77feba6 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11










                                                                              


                          
















                              
                               








                              
                         
                      
                       
                            
                                 
                            



                            
                         
                              
                              
                          
                                
                         
                          
                        
                         



                      
                         
                           
 




                            

                                                                               
   
 



                                                                                                   
                               

                           


                                     




                                  




                                            


                                           




                          





                                           













                                                                    





                                                         

                             
                                     
 
                 
                                                         
                                                 

  








                                                                    



                                                 

                                  
                           
                               
                 

 
                            


                                    

                                          


                                       

 
                                
     
                              












                                                                           
 







                                              








                                                                      
                                





                                                



                                                                          
                                                                      
                                                            








                                                                               

                               











                                                                            
                              
                 



                                         
                             
                 

 

                                   






                                            
                                  
























                                                                            
                                                                       



































                                                                                



                                              
















                                                                      











                                              















                                                             
                                      
                                                      






                                            
                                                             

                                                                 

                                                           
 
                                  

                                                                              
                            

         
                                           



                                                    
                         

                                                          
                                   
                                                        
                              

                                     



                                                                           
                                      




      













                                                                         







                                                                        
                                                 

                               

                
                                                                   
                              

                                                                    
                        


                                                       
                                              
           
                                       
                                    
                   
                          
 
                                                      
                   
 






                                                        



                                                               


















                                                                               
                                                                   







                                                                          








                                                                           



                                                              



                                                                       


                                 



                                                              
                      
                       
                            


                              
                                    
 




                                                           
                    
                        

                            
                             
                                  
                                         
                       
                              
                                                                  













                                                                          
                                                                           
                            
                                                                          

                                                    




                                                                           





                            
                        
                       
                           
                    


                                                                               
                             
                           








                                                                           


                       






                                                                   








                                                                              
                            
                   
                                 
                           
                          
                                

























                                                         
                      
                      
                               
                         


                     
                                                                 














                                                          







                                                                      
                                      


                                 

                                     


                                                               
                                     
                                         

                 
                                   
 














                                                                             
                                                                    


                                                                 








                                                     



                                                                       



















                                                                            
                        


                       








                                               
                                             

                                         

                         
                          

                                        




                                                 
                                                           

 









































                                                                                     
                                            













                                                         
                                           
 

                                    
                                         


                               




                          





                                                                            





                                                                                
                                    
         





                                                           

                    
 
/*
 *  linux/init/main.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *
 *  GK 2/5/95  -  Changed to support mounting root fs via NFS
 *  Added initrd & change_root: Werner Almesberger & Hans Lermen, Feb '96
 *  Moan early if gcc is old, avoiding bogus kernels - Paul Gortmaker, May '96
 *  Simplified starting of init:  Michael A. Griffith <grif@acm.org> 
 */

#include <linux/types.h>
#include <linux/module.h>
#include <linux/proc_fs.h>
#include <linux/kernel.h>
#include <linux/syscalls.h>
#include <linux/string.h>
#include <linux/ctype.h>
#include <linux/delay.h>
#include <linux/utsname.h>
#include <linux/ioport.h>
#include <linux/init.h>
#include <linux/smp_lock.h>
#include <linux/initrd.h>
#include <linux/hdreg.h>
#include <linux/bootmem.h>
#include <linux/tty.h>
#include <linux/gfp.h>
#include <linux/percpu.h>
#include <linux/kmod.h>
#include <linux/kernel_stat.h>
#include <linux/start_kernel.h>
#include <linux/security.h>
#include <linux/workqueue.h>
#include <linux/profile.h>
#include <linux/rcupdate.h>
#include <linux/moduleparam.h>
#include <linux/kallsyms.h>
#include <linux/writeback.h>
#include <linux/cpu.h>
#include <linux/cpuset.h>
#include <linux/cgroup.h>
#include <linux/efi.h>
#include <linux/tick.h>
#include <linux/interrupt.h>
#include <linux/taskstats_kern.h>
#include <linux/delayacct.h>
#include <linux/unistd.h>
#include <linux/rmap.h>
#include <linux/mempolicy.h>
#include <linux/key.h>
#include <linux/unwind.h>
#include <linux/buffer_head.h>
#include <linux/debug_locks.h>
#include <linux/lockdep.h>
#include <linux/pid_namespace.h>
#include <linux/device.h>
#include <linux/kthread.h>
#include <linux/sched.h>
#include <linux/signal.h>

#include <asm/io.h>
#include <asm/bugs.h>
#include <asm/setup.h>
#include <asm/sections.h>
#include <asm/cacheflush.h>

#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/smp.h>
#endif

/*
 * This is one of the first .c files built. Error out early if we have compiler
 * trouble.
 */

#if __GNUC__ == 4 && __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ == 0
#warning gcc-4.1.0 is known to miscompile the kernel.  A different compiler version is recommended.
#endif

static int kernel_init(void *);

extern void init_IRQ(void);
extern void fork_init(unsigned long);
extern void mca_init(void);
extern void sbus_init(void);
extern void pidhash_init(void);
extern void pidmap_init(void);
extern void prio_tree_init(void);
extern void radix_tree_init(void);
extern void free_initmem(void);
#ifdef	CONFIG_ACPI
extern void acpi_early_init(void);
#else
static inline void acpi_early_init(void) { }
#endif
#ifndef CONFIG_DEBUG_RODATA
static inline void mark_rodata_ro(void) { }
#endif

#ifdef CONFIG_TC
extern void tc_init(void);
#endif

#ifdef CONFIG_ACPI_CUSTOM_DSDT_INITRD
extern int populate_rootfs(void);
#else
static inline void populate_rootfs(void) {}
#endif

enum system_states system_state;
EXPORT_SYMBOL(system_state);

/*
 * Boot command-line arguments
 */
#define MAX_INIT_ARGS CONFIG_INIT_ENV_ARG_LIMIT
#define MAX_INIT_ENVS CONFIG_INIT_ENV_ARG_LIMIT

extern void time_init(void);
/* Default late time init is NULL. archs can override this later. */
void (*late_time_init)(void);
extern void softirq_init(void);

/* Untouched command line saved by arch-specific code. */
char __initdata boot_command_line[COMMAND_LINE_SIZE];
/* Untouched saved command line (eg. for /proc) */
char *saved_command_line;
/* Command line for parameter parsing */
static char *static_command_line;

static char *execute_command;
static char *ramdisk_execute_command;

#ifdef CONFIG_SMP
/* Setup configured maximum number of CPUs to activate */
unsigned int __initdata setup_max_cpus = NR_CPUS;

/*
 * Setup routine for controlling SMP activation
 *
 * Command-line option of "nosmp" or "maxcpus=0" will disable SMP
 * activation entirely (the MPS table probe still happens, though).
 *
 * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
 * greater than 0, limits the maximum number of CPUs activated in
 * SMP mode to <NUM>.
 */
#ifndef CONFIG_X86_IO_APIC
static inline void disable_ioapic_setup(void) {};
#endif

static int __init nosmp(char *str)
{
	setup_max_cpus = 0;
	disable_ioapic_setup();
	return 0;
}

early_param("nosmp", nosmp);

static int __init maxcpus(char *str)
{
	get_option(&str, &setup_max_cpus);
	if (setup_max_cpus == 0)
		disable_ioapic_setup();

	return 0;
}

early_param("maxcpus", maxcpus);
#else
#define setup_max_cpus NR_CPUS
#endif

/*
 * If set, this is an indication to the drivers that reset the underlying
 * device before going ahead with the initialization otherwise driver might
 * rely on the BIOS and skip the reset operation.
 *
 * This is useful if kernel is booting in an unreliable environment.
 * For ex. kdump situaiton where previous kernel has crashed, BIOS has been
 * skipped and devices will be in unknown state.
 */
unsigned int reset_devices;
EXPORT_SYMBOL(reset_devices);

static int __init set_reset_devices(char *str)
{
	reset_devices = 1;
	return 1;
}

__setup("reset_devices", set_reset_devices);

static char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
static const char *panic_later, *panic_param;

extern struct obs_kernel_param __setup_start[], __setup_end[];

static int __init obsolete_checksetup(char *line)
{
	struct obs_kernel_param *p;
	int had_early_param = 0;

	p = __setup_start;
	do {
		int n = strlen(p->str);
		if (!strncmp(line, p->str, n)) {
			if (p->early) {
				/* Already done in parse_early_param?
				 * (Needs exact match on param part).
				 * Keep iterating, as we can have early
				 * params and __setups of same names 8( */
				if (line[n] == '\0' || line[n] == '=')
					had_early_param = 1;
			} else if (!p->setup_func) {
				printk(KERN_WARNING "Parameter %s is obsolete,"
				       " ignored\n", p->str);
				return 1;
			} else if (p->setup_func(line + n))
				return 1;
		}
		p++;
	} while (p < __setup_end);

	return had_early_param;
}

/*
 * This should be approx 2 Bo*oMips to start (note initial shift), and will
 * still work even if initially too large, it will just take slightly longer
 */
unsigned long loops_per_jiffy = (1<<12);

EXPORT_SYMBOL(loops_per_jiffy);

static int __init debug_kernel(char *str)
{
	console_loglevel = 10;
	return 0;
}

static int __init quiet_kernel(char *str)
{
	console_loglevel = 4;
	return 0;
}

early_param("debug", debug_kernel);
early_param("quiet", quiet_kernel);

static int __init loglevel(char *str)
{
	get_option(&str, &console_loglevel);
	return 1;
}

early_param("loglevel", loglevel);

/*
 * Unknown boot options get handed to init, unless they look like
 * failed parameters
 */
static int __init unknown_bootoption(char *param, char *val)
{
	/* Change NUL term back to "=", to make "param" the whole string. */
	if (val) {
		/* param=val or param="val"? */
		if (val == param+strlen(param)+1)
			val[-1] = '=';
		else if (val == param+strlen(param)+2) {
			val[-2] = '=';
			memmove(val-1, val, strlen(val)+1);
			val--;
		} else
			BUG();
	}

	/* Handle obsolete-style parameters */
	if (obsolete_checksetup(param))
		return 0;

	/*
	 * Preemptive maintenance for "why didn't my misspelled command
	 * line work?"
	 */
	if (strchr(param, '.') && (!val || strchr(param, '.') < val)) {
		printk(KERN_ERR "Unknown boot option `%s': ignoring\n", param);
		return 0;
	}

	if (panic_later)
		return 0;

	if (val) {
		/* Environment option */
		unsigned int i;
		for (i = 0; envp_init[i]; i++) {
			if (i == MAX_INIT_ENVS) {
				panic_later = "Too many boot env vars at `%s'";
				panic_param = param;
			}
			if (!strncmp(param, envp_init[i], val - param))
				break;
		}
		envp_init[i] = param;
	} else {
		/* Command line option */
		unsigned int i;
		for (i = 0; argv_init[i]; i++) {
			if (i == MAX_INIT_ARGS) {
				panic_later = "Too many boot init vars at `%s'";
				panic_param = param;
			}
		}
		argv_init[i] = param;
	}
	return 0;
}

#ifdef CONFIG_DEBUG_PAGEALLOC
int __read_mostly debug_pagealloc_enabled = 0;
#endif

static int __init init_setup(char *str)
{
	unsigned int i;

	execute_command = str;
	/*
	 * In case LILO is going to boot us with default command line,
	 * it prepends "auto" before the whole cmdline which makes
	 * the shell think it should execute a script with such name.
	 * So we ignore all arguments entered _before_ init=... [MJ]
	 */
	for (i = 1; i < MAX_INIT_ARGS; i++)
		argv_init[i] = NULL;
	return 1;
}
__setup("init=", init_setup);

static int __init rdinit_setup(char *str)
{
	unsigned int i;

	ramdisk_execute_command = str;
	/* See "auto" comment in init_setup */
	for (i = 1; i < MAX_INIT_ARGS; i++)
		argv_init[i] = NULL;
	return 1;
}
__setup("rdinit=", rdinit_setup);

#ifndef CONFIG_SMP

#ifdef CONFIG_X86_LOCAL_APIC
static void __init smp_init(void)
{
	APIC_init_uniprocessor();
}
#else
#define smp_init()	do { } while (0)
#endif

static inline void setup_per_cpu_areas(void) { }
static inline void smp_prepare_cpus(unsigned int maxcpus) { }

#else

#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;

EXPORT_SYMBOL(__per_cpu_offset);

static void __init setup_per_cpu_areas(void)
{
	unsigned long size, i;
	char *ptr;
	unsigned long nr_possible_cpus = num_possible_cpus();

	/* Copy section for each CPU (we discard the original) */
	size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
	ptr = alloc_bootmem_pages(size * nr_possible_cpus);

	for_each_possible_cpu(i) {
		__per_cpu_offset[i] = ptr - __per_cpu_start;
		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
		ptr += size;
	}
}
#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */

/* Called by boot processor to activate the rest. */
static void __init smp_init(void)
{
	unsigned int cpu;

	/* FIXME: This should be done in userspace --RR */
	for_each_present_cpu(cpu) {
		if (num_online_cpus() >= setup_max_cpus)
			break;
		if (!cpu_online(cpu))
			cpu_up(cpu);
	}

	/* Any cleanup work */
	printk(KERN_INFO "Brought up %ld CPUs\n", (long)num_online_cpus());
	smp_cpus_done(setup_max_cpus);
}

#endif

/*
 * We need to store the untouched command line for future reference.
 * We also need to store the touched command line since the parameter
 * parsing is performed in place, and we should allow a component to
 * store reference of name/value for future reference.
 */
static void __init setup_command_line(char *command_line)
{
	saved_command_line = alloc_bootmem(strlen (boot_command_line)+1);
	static_command_line = alloc_bootmem(strlen (command_line)+1);
	strcpy (saved_command_line, boot_command_line);
	strcpy (static_command_line, command_line);
}

/*
 * We need to finalize in a non-__init function or else race conditions
 * between the root thread and the init thread may cause start_kernel to
 * be reaped by free_initmem before the root thread has proceeded to
 * cpu_idle.
 *
 * gcc-3.4 accidentally inlines this function, so use noinline.
 */

static void noinline __init_refok rest_init(void)
	__releases(kernel_lock)
{
	int pid;

	kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND);
	numa_default_policy();
	pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
	kthreadd_task = find_task_by_pid(pid);
	unlock_kernel();

	/*
	 * The boot idle thread must execute schedule()
	 * at least once to get things moving:
	 */
	init_idle_bootup_task(current);
	preempt_enable_no_resched();
	schedule();
	preempt_disable();

	/* Call into cpu_idle with preempt disabled */
	cpu_idle();
}

/* Check for early params. */
static int __init do_early_param(char *param, char *val)
{
	struct obs_kernel_param *p;

	for (p = __setup_start; p < __setup_end; p++) {
		if ((p->early && strcmp(param, p->str) == 0) ||
		    (strcmp(param, "console") == 0 &&
		     strcmp(p->str, "earlycon") == 0)
		) {
			if (p->setup_func(val) != 0)
				printk(KERN_WARNING
				       "Malformed early option '%s'\n", param);
		}
	}
	/* We accept everything at this stage. */
	return 0;
}

/* Arch code calls this early on, or if not, just before other parsing. */
void __init parse_early_param(void)
{
	static __initdata int done = 0;
	static __initdata char tmp_cmdline[COMMAND_LINE_SIZE];

	if (done)
		return;

	/* All fall through to do_early_param. */
	strlcpy(tmp_cmdline, boot_command_line, COMMAND_LINE_SIZE);
	parse_args("early options", tmp_cmdline, NULL, 0, do_early_param);
	done = 1;
}

/*
 *	Activate the first processor.
 */

static void __init boot_cpu_init(void)
{
	int cpu = smp_processor_id();
	/* Mark the boot cpu "present", "online" etc for SMP and UP case */
	cpu_set(cpu, cpu_online_map);
	cpu_set(cpu, cpu_present_map);
	cpu_set(cpu, cpu_possible_map);
}

void __init __attribute__((weak)) smp_setup_processor_id(void)
{
}

asmlinkage void __init start_kernel(void)
{
	char * command_line;
	extern struct kernel_param __start___param[], __stop___param[];

	smp_setup_processor_id();

	/*
	 * Need to run as early as possible, to initialize the
	 * lockdep hash:
	 */
	unwind_init();
	lockdep_init();
	cgroup_init_early();

	local_irq_disable();
	early_boot_irqs_off();
	early_init_irq_lock_class();

/*
 * Interrupts are still disabled. Do necessary setups, then
 * enable them
 */
	lock_kernel();
	tick_init();
	boot_cpu_init();
	page_address_init();
	printk(KERN_NOTICE);
	printk(linux_banner);
	setup_arch(&command_line);
	setup_command_line(command_line);
	unwind_setup();
	setup_per_cpu_areas();
	smp_prepare_boot_cpu();	/* arch-specific boot-cpu hooks */

	/*
	 * Set up the scheduler prior starting any interrupts (such as the
	 * timer interrupt). Full topology setup happens at smp_init()
	 * time - but meanwhile we still have a functioning scheduler.
	 */
	sched_init();
	/*
	 * Disable preemption - early bootup scheduling is extremely
	 * fragile until we cpu_idle() for the first time.
	 */
	preempt_disable();
	build_all_zonelists();
	page_alloc_init();
	printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line);
	parse_early_param();
	parse_args("Booting kernel", static_command_line, __start___param,
		   __stop___param - __start___param,
		   &unknown_bootoption);
	if (!irqs_disabled()) {
		printk(KERN_WARNING "start_kernel(): bug: interrupts were "
				"enabled *very* early, fixing it\n");
		local_irq_disable();
	}
	sort_main_extable();
	trap_init();
	rcu_init();
	init_IRQ();
	pidhash_init();
	init_timers();
	hrtimers_init();
	softirq_init();
	timekeeping_init();
	time_init();
	profile_init();
	if (!irqs_disabled())
		printk("start_kernel(): bug: interrupts were enabled early\n");
	early_boot_irqs_on();
	local_irq_enable();

	/*
	 * HACK ALERT! This is early. We're enabling the console before
	 * we've done PCI setups etc, and console_init() must be aware of
	 * this. But we do want output early, in case something goes wrong.
	 */
	console_init();
	if (panic_later)
		panic(panic_later, panic_param);

	lockdep_info();

	/*
	 * Need to run this when irqs are enabled, because it wants
	 * to self-test [hard/soft]-irqs on/off lock inversion bugs
	 * too:
	 */
	locking_selftest();

#ifdef CONFIG_BLK_DEV_INITRD
	if (initrd_start && !initrd_below_start_ok &&
			initrd_start < min_low_pfn << PAGE_SHIFT) {
		printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - "
		    "disabling it.\n",initrd_start,min_low_pfn << PAGE_SHIFT);
		initrd_start = 0;
	}
#endif
	vfs_caches_init_early();
	cpuset_init_early();
	mem_init();
	enable_debug_pagealloc();
	cpu_hotplug_init();
	kmem_cache_init();
	setup_per_cpu_pageset();
	numa_policy_init();
	if (late_time_init)
		late_time_init();
	calibrate_delay();
	pidmap_init();
	pgtable_cache_init();
	prio_tree_init();
	anon_vma_init();
#ifdef CONFIG_X86
	if (efi_enabled)
		efi_enter_virtual_mode();
#endif
	fork_init(num_physpages);
	proc_caches_init();
	buffer_init();
	unnamed_dev_init();
	key_init();
	security_init();
	vfs_caches_init(num_physpages);
	radix_tree_init();
	signals_init();
	/* rootfs populating might need page-writeback */
	page_writeback_init();
#ifdef CONFIG_PROC_FS
	proc_root_init();
#endif
	cgroup_init();
	cpuset_init();
	taskstats_init_early();
	delayacct_init();

	check_bugs();

	populate_rootfs(); /* For DSDT override from initramfs */
	acpi_early_init(); /* before LAPIC and SMP init */

	/* Do the rest non-__init'ed, we're now alive */
	rest_init();
}

static int __initdata initcall_debug;

static int __init initcall_debug_setup(char *str)
{
	initcall_debug = 1;
	return 1;
}
__setup("initcall_debug", initcall_debug_setup);

extern initcall_t __initcall_start[], __initcall_end[];

static void __init do_initcalls(void)
{
	initcall_t *call;
	int count = preempt_count();

	for (call = __initcall_start; call < __initcall_end; call++) {
		ktime_t t0, t1, delta;
		char *msg = NULL;
		char msgbuf[40];
		int result;

		if (initcall_debug) {
			printk("Calling initcall 0x%p", *call);
			print_fn_descriptor_symbol(": %s()",
					(unsigned long) *call);
			printk("\n");
			t0 = ktime_get();
		}

		result = (*call)();

		if (initcall_debug) {
			t1 = ktime_get();
			delta = ktime_sub(t1, t0);

			printk("initcall 0x%p", *call);
			print_fn_descriptor_symbol(": %s()",
					(unsigned long) *call);
			printk(" returned %d.\n", result);

			printk("initcall 0x%p ran for %Ld msecs: ",
				*call, (unsigned long long)delta.tv64 >> 20);
			print_fn_descriptor_symbol("%s()\n",
				(unsigned long) *call);
		}

		if (result && result != -ENODEV && initcall_debug) {
			sprintf(msgbuf, "error code %d", result);
			msg = msgbuf;
		}
		if (preempt_count() != count) {
			msg = "preemption imbalance";
			preempt_count() = count;
		}
		if (irqs_disabled()) {
			msg = "disabled interrupts";
			local_irq_enable();
		}
		if (msg) {
			printk(KERN_WARNING "initcall at 0x%p", *call);
			print_fn_descriptor_symbol(": %s()",
					(unsigned long) *call);
			printk(": returned with %s\n", msg);
		}
	}

	/* Make sure there is no pending stuff from the initcall sequence */
	flush_scheduled_work();
}

/*
 * Ok, the machine is now initialized. None of the devices
 * have been touched yet, but the CPU subsystem is up and
 * running, and memory and process management works.
 *
 * Now we can finally start doing some real work..
 */
static void __init do_basic_setup(void)
{
	/* drivers will send hotplug events */
	init_workqueues();
	usermodehelper_init();
	driver_init();
	init_irq_proc();
	do_initcalls();
}

static int __initdata nosoftlockup;

static int __init nosoftlockup_setup(char *str)
{
	nosoftlockup = 1;
	return 1;
}
__setup("nosoftlockup", nosoftlockup_setup);

static void __init do_pre_smp_initcalls(void)
{
	extern int spawn_ksoftirqd(void);

	migration_init();
	spawn_ksoftirqd();
	if (!nosoftlockup)
		spawn_softlockup_task();
}

static void run_init_process(char *init_filename)
{
	argv_init[0] = init_filename;
	kernel_execve(init_filename, argv_init, envp_init);
}

/* This is a non __init function. Force it to be noinline otherwise gcc
 * makes it inline to init() and it becomes part of init.text section
 */
static int noinline init_post(void)
{
	free_initmem();
	unlock_kernel();
	mark_rodata_ro();
	system_state = SYSTEM_RUNNING;
	numa_default_policy();

	if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
		printk(KERN_WARNING "Warning: unable to open an initial console.\n");

	(void) sys_dup(0);
	(void) sys_dup(0);

	if (ramdisk_execute_command) {
		run_init_process(ramdisk_execute_command);
		printk(KERN_WARNING "Failed to execute %s\n",
				ramdisk_execute_command);
	}

	/*
	 * We try each of these until one succeeds.
	 *
	 * The Bourne shell can be used instead of init if we are
	 * trying to recover a really broken machine.
	 */
	if (execute_command) {
		run_init_process(execute_command);
		printk(KERN_WARNING "Failed to execute %s.  Attempting "
					"defaults...\n", execute_command);
	}
	run_init_process("/sbin/init");
	run_init_process("/etc/init");
	run_init_process("/bin/init");
	run_init_process("/bin/sh");

	panic("No init found.  Try passing init= option to kernel.");
}

static int __init kernel_init(void * unused)
{
	lock_kernel();
	/*
	 * init can run on any cpu.
	 */
	set_cpus_allowed(current, CPU_MASK_ALL);
	/*
	 * Tell the world that we're going to be the grim
	 * reaper of innocent orphaned children.
	 *
	 * We don't want people to have to make incorrect
	 * assumptions about where in the task array this
	 * can be found.
	 */
	init_pid_ns.child_reaper = current;

	cad_pid = task_pid(current);

	smp_prepare_cpus(setup_max_cpus);

	do_pre_smp_initcalls();

	smp_init();
	sched_init_smp();

	cpuset_init_smp();

	do_basic_setup();

	/*
	 * check if there is an early userspace init.  If yes, let it do all
	 * the work
	 */

	if (!ramdisk_execute_command)
		ramdisk_execute_command = "/init";

	if (sys_access((const char __user *) ramdisk_execute_command, 0) != 0) {
		ramdisk_execute_command = NULL;
		prepare_namespace();
	}

	/*
	 * Ok, we have completed the initial bootup, and
	 * we're essentially up and running. Get rid of the
	 * initmem segments and start the user-mode stuff..
	 */
	init_post();
	return 0;
}
/span>attr.ist1, i); } snprintf(port->int_aff_name, EHEA_IRQ_NAME_SIZE - 1, "%s-aff", dev->name); ret = ibmebus_request_irq(NULL, port->qp_eq->attr.ist1, ehea_qp_aff_irq_handler, SA_INTERRUPT, port->int_aff_name, port); if (ret) { ehea_error("failed registering irq for qp_aff_irq_handler:" "ist=%X", port->qp_eq->attr.ist1); goto out_free_qpeq; } if (netif_msg_ifup(port)) ehea_info("irq_handle 0x%X for function qp_aff_irq_handler " "registered", port->qp_eq->attr.ist1); for (i = 0; i < port->num_def_qps + port->num_add_tx_qps; i++) { pr = &port->port_res[i]; snprintf(pr->int_send_name, EHEA_IRQ_NAME_SIZE - 1, "%s-send%d", dev->name, i); ret = ibmebus_request_irq(NULL, pr->send_eq->attr.ist1, ehea_send_irq_handler, SA_INTERRUPT, pr->int_send_name, pr); if (ret) { ehea_error("failed registering irq for ehea_send " "port_res_nr:%d, ist=%X", i, pr->send_eq->attr.ist1); goto out_free_req; } if (netif_msg_ifup(port)) ehea_info("irq_handle 0x%X for function ehea_send_int " "%d registered", pr->send_eq->attr.ist1, i); } out: return ret; out_free_req: while (--i >= 0) { u32 ist = port->port_res[i].send_eq->attr.ist1; ibmebus_free_irq(NULL, ist, &port->port_res[i]); } out_free_qpeq: ibmebus_free_irq(NULL, port->qp_eq->attr.ist1, port); i = port->num_def_qps; out_free_seq: while (--i >= 0) { u32 ist = port->port_res[i].recv_eq->attr.ist1; ibmebus_free_irq(NULL, ist, &port->port_res[i]); } goto out; } static void ehea_free_interrupts(struct net_device *dev) { struct ehea_port *port = netdev_priv(dev); struct ehea_port_res *pr; int i; /* send */ for (i = 0; i < port->num_def_qps + port->num_add_tx_qps; i++) { pr = &port->port_res[i]; ibmebus_free_irq(NULL, pr->send_eq->attr.ist1, pr); if (netif_msg_intr(port)) ehea_info("free send irq for res %d with handle 0x%X", i, pr->send_eq->attr.ist1); } /* receive */ for (i = 0; i < port->num_def_qps; i++) { pr = &port->port_res[i]; ibmebus_free_irq(NULL, pr->recv_eq->attr.ist1, pr); if (netif_msg_intr(port)) ehea_info("free recv irq for res %d with handle 0x%X", i, pr->recv_eq->attr.ist1); } /* associated events */ ibmebus_free_irq(NULL, port->qp_eq->attr.ist1, port); if (netif_msg_intr(port)) ehea_info("associated event interrupt for handle 0x%X freed", port->qp_eq->attr.ist1); } static int ehea_configure_port(struct ehea_port *port) { int ret, i; u64 hret, mask; struct hcp_ehea_port_cb0 *cb0; ret = -ENOMEM; cb0 = kzalloc(PAGE_SIZE, GFP_KERNEL); if (!cb0) goto out; cb0->port_rc = EHEA_BMASK_SET(PXLY_RC_VALID, 1) | EHEA_BMASK_SET(PXLY_RC_IP_CHKSUM, 1) | EHEA_BMASK_SET(PXLY_RC_TCP_UDP_CHKSUM, 1) | EHEA_BMASK_SET(PXLY_RC_VLAN_XTRACT, 1) | EHEA_BMASK_SET(PXLY_RC_VLAN_TAG_FILTER, PXLY_RC_VLAN_FILTER) | EHEA_BMASK_SET(PXLY_RC_JUMBO_FRAME, 1); for (i = 0; i < port->num_def_qps; i++) cb0->default_qpn_arr[i] = port->port_res[i].qp->init_attr.qp_nr; if (netif_msg_ifup(port)) ehea_dump(cb0, sizeof(*cb0), "ehea_configure_port"); mask = EHEA_BMASK_SET(H_PORT_CB0_PRC, 1) | EHEA_BMASK_SET(H_PORT_CB0_DEFQPNARRAY, 1); hret = ehea_h_modify_ehea_port(port->adapter->handle, port->logical_port_id, H_PORT_CB0, mask, cb0); ret = -EIO; if (hret != H_SUCCESS) goto out_free; ret = 0; out_free: kfree(cb0); out: return ret; } static int ehea_gen_smrs(struct ehea_port_res *pr) { u64 hret; struct ehea_adapter *adapter = pr->port->adapter; hret = ehea_h_register_smr(adapter->handle, adapter->mr.handle, adapter->mr.vaddr, EHEA_MR_ACC_CTRL, adapter->pd, &pr->send_mr); if (hret != H_SUCCESS) goto out; hret = ehea_h_register_smr(adapter->handle, adapter->mr.handle, adapter->mr.vaddr, EHEA_MR_ACC_CTRL, adapter->pd, &pr->recv_mr); if (hret != H_SUCCESS) goto out_freeres; return 0; out_freeres: hret = ehea_h_free_resource(adapter->handle, pr->send_mr.handle); if (hret != H_SUCCESS) ehea_error("failed freeing SMR"); out: return -EIO; } static int ehea_rem_smrs(struct ehea_port_res *pr) { struct ehea_adapter *adapter = pr->port->adapter; int ret = 0; u64 hret; hret = ehea_h_free_resource(adapter->handle, pr->send_mr.handle); if (hret != H_SUCCESS) { ret = -EIO; ehea_error("failed freeing send SMR for pr=%p", pr); } hret = ehea_h_free_resource(adapter->handle, pr->recv_mr.handle); if (hret != H_SUCCESS) { ret = -EIO; ehea_error("failed freeing recv SMR for pr=%p", pr); } return ret; } static int ehea_init_q_skba(struct ehea_q_skb_arr *q_skba, int max_q_entries) { int arr_size = sizeof(void*) * max_q_entries; q_skba->arr = vmalloc(arr_size); if (!q_skba->arr) return -ENOMEM; memset(q_skba->arr, 0, arr_size); q_skba->len = max_q_entries; q_skba->index = 0; q_skba->os_skbs = 0; return 0; } static int ehea_init_port_res(struct ehea_port *port, struct ehea_port_res *pr, struct port_res_cfg *pr_cfg, int queue_token) { struct ehea_adapter *adapter = port->adapter; enum ehea_eq_type eq_type = EHEA_EQ; struct ehea_qp_init_attr *init_attr = NULL; int ret = -EIO; memset(pr, 0, sizeof(struct ehea_port_res)); pr->port = port; spin_lock_init(&pr->send_lock); spin_lock_init(&pr->recv_lock); spin_lock_init(&pr->xmit_lock); spin_lock_init(&pr->netif_queue); pr->recv_eq = ehea_create_eq(adapter, eq_type, EHEA_MAX_ENTRIES_EQ, 0); if (!pr->recv_eq) { ehea_error("create_eq failed (recv_eq)"); goto out_free; } pr->send_eq = ehea_create_eq(adapter, eq_type, EHEA_MAX_ENTRIES_EQ, 0); if (!pr->send_eq) { ehea_error("create_eq failed (send_eq)"); goto out_free; } pr->recv_cq = ehea_create_cq(adapter, pr_cfg->max_entries_rcq, pr->recv_eq->fw_handle, port->logical_port_id); if (!pr->recv_cq) { ehea_error("create_cq failed (cq_recv)"); goto out_free; } pr->send_cq = ehea_create_cq(adapter, pr_cfg->max_entries_scq, pr->send_eq->fw_handle, port->logical_port_id); if (!pr->send_cq) { ehea_error("create_cq failed (cq_send)"); goto out_free; } if (netif_msg_ifup(port)) ehea_info("Send CQ: act_nr_cqes=%d, Recv CQ: act_nr_cqes=%d", pr->send_cq->attr.act_nr_of_cqes, pr->recv_cq->attr.act_nr_of_cqes); init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL); if (!init_attr) { ret = -ENOMEM; ehea_error("no mem for ehea_qp_init_attr"); goto out_free; } init_attr->low_lat_rq1 = 1; init_attr->signalingtype = 1; /* generate CQE if specified in WQE */ init_attr->rq_count = 3; init_attr->qp_token = queue_token; init_attr->max_nr_send_wqes = pr_cfg->max_entries_sq; init_attr->max_nr_rwqes_rq1 = pr_cfg->max_entries_rq1; init_attr->max_nr_rwqes_rq2 = pr_cfg->max_entries_rq2; init_attr->max_nr_rwqes_rq3 = pr_cfg->max_entries_rq3; init_attr->wqe_size_enc_sq = EHEA_SG_SQ; init_attr->wqe_size_enc_rq1 = EHEA_SG_RQ1; init_attr->wqe_size_enc_rq2 = EHEA_SG_RQ2; init_attr->wqe_size_enc_rq3 = EHEA_SG_RQ3; init_attr->rq2_threshold = EHEA_RQ2_THRESHOLD; init_attr->rq3_threshold = EHEA_RQ3_THRESHOLD; init_attr->port_nr = port->logical_port_id; init_attr->send_cq_handle = pr->send_cq->fw_handle; init_attr->recv_cq_handle = pr->recv_cq->fw_handle; init_attr->aff_eq_handle = port->qp_eq->fw_handle; pr->qp = ehea_create_qp(adapter, adapter->pd, init_attr); if (!pr->qp) { ehea_error("create_qp failed"); ret = -EIO; goto out_free; } if (netif_msg_ifup(port)) ehea_info("QP: qp_nr=%d\n act_nr_snd_wqe=%d\n nr_rwqe_rq1=%d\n " "nr_rwqe_rq2=%d\n nr_rwqe_rq3=%d", init_attr->qp_nr, init_attr->act_nr_send_wqes, init_attr->act_nr_rwqes_rq1, init_attr->act_nr_rwqes_rq2, init_attr->act_nr_rwqes_rq3); ret = ehea_init_q_skba(&pr->sq_skba, init_attr->act_nr_send_wqes + 1); ret |= ehea_init_q_skba(&pr->rq1_skba, init_attr->act_nr_rwqes_rq1 + 1); ret |= ehea_init_q_skba(&pr->rq2_skba, init_attr->act_nr_rwqes_rq2 + 1); ret |= ehea_init_q_skba(&pr->rq3_skba, init_attr->act_nr_rwqes_rq3 + 1); if (ret) goto out_free; pr->swqe_refill_th = init_attr->act_nr_send_wqes / 10; if (ehea_gen_smrs(pr) != 0) { ret = -EIO; goto out_free; } tasklet_init(&pr->send_comp_task, ehea_send_irq_tasklet, (unsigned long)pr); atomic_set(&pr->swqe_avail, init_attr->act_nr_send_wqes - 1); kfree(init_attr); ret = 0; goto out; out_free: kfree(init_attr); vfree(pr->sq_skba.arr); vfree(pr->rq1_skba.arr); vfree(pr->rq2_skba.arr); vfree(pr->rq3_skba.arr); ehea_destroy_qp(pr->qp); ehea_destroy_cq(pr->send_cq); ehea_destroy_cq(pr->recv_cq); ehea_destroy_eq(pr->send_eq); ehea_destroy_eq(pr->recv_eq); out: return ret; } static int ehea_clean_portres(struct ehea_port *port, struct ehea_port_res *pr) { int ret, i; ret = ehea_destroy_qp(pr->qp); if (!ret) { ehea_destroy_cq(pr->send_cq); ehea_destroy_cq(pr->recv_cq); ehea_destroy_eq(pr->send_eq); ehea_destroy_eq(pr->recv_eq); for (i = 0; i < pr->rq1_skba.len; i++) if (pr->rq1_skba.arr[i]) dev_kfree_skb(pr->rq1_skba.arr[i]); for (i = 0; i < pr->rq2_skba.len; i++) if (pr->rq2_skba.arr[i]) dev_kfree_skb(pr->rq2_skba.arr[i]); for (i = 0; i < pr->rq3_skba.len; i++) if (pr->rq3_skba.arr[i]) dev_kfree_skb(pr->rq3_skba.arr[i]); for (i = 0; i < pr->sq_skba.len; i++) if (pr->sq_skba.arr[i]) dev_kfree_skb(pr->sq_skba.arr[i]); vfree(pr->rq1_skba.arr); vfree(pr->rq2_skba.arr); vfree(pr->rq3_skba.arr); vfree(pr->sq_skba.arr); ret = ehea_rem_smrs(pr); } return ret; } /* * The write_* functions store information in swqe which is used by * the hardware to calculate the ip/tcp/udp checksum */ static inline void write_ip_start_end(struct ehea_swqe *swqe, const struct sk_buff *skb) { swqe->ip_start = (u8)(((u64)skb->nh.iph) - ((u64)skb->data)); swqe->ip_end = (u8)(swqe->ip_start + skb->nh.iph->ihl * 4 - 1); } static inline void write_tcp_offset_end(struct ehea_swqe *swqe, const struct sk_buff *skb) { swqe->tcp_offset = (u8)(swqe->ip_end + 1 + offsetof(struct tcphdr, check)); swqe->tcp_end = (u16)skb->len - 1; } static inline void write_udp_offset_end(struct ehea_swqe *swqe, const struct sk_buff *skb) { swqe->tcp_offset = (u8)(swqe->ip_end + 1 + offsetof(struct udphdr, check)); swqe->tcp_end = (u16)skb->len - 1; } static void write_swqe2_TSO(struct sk_buff *skb, struct ehea_swqe *swqe, u32 lkey) { struct ehea_vsgentry *sg1entry = &swqe->u.immdata_desc.sg_entry; u8 *imm_data = &swqe->u.immdata_desc.immediate_data[0]; int skb_data_size = skb->len - skb->data_len; int headersize; u64 tmp_addr; /* Packet is TCP with TSO enabled */ swqe->tx_control |= EHEA_SWQE_TSO; swqe->mss = skb_shinfo(skb)->gso_size; /* copy only eth/ip/tcp headers to immediate data and * the rest of skb->data to sg1entry */ headersize = ETH_HLEN + (skb->nh.iph->ihl * 4) + (skb->h.th->doff * 4); skb_data_size = skb->len - skb->data_len; if (skb_data_size >= headersize) { /* copy immediate data */ memcpy(imm_data, skb->data, headersize); swqe->immediate_data_length = headersize; if (skb_data_size > headersize) { /* set sg1entry data */ sg1entry->l_key = lkey; sg1entry->len = skb_data_size - headersize; tmp_addr = (u64)(skb->data + headersize); sg1entry->vaddr = tmp_addr; swqe->descriptors++; } } else ehea_error("cannot handle fragmented headers"); } static void write_swqe2_nonTSO(struct sk_buff *skb, struct ehea_swqe *swqe, u32 lkey) { int skb_data_size = skb->len - skb->data_len; u8 *imm_data = &swqe->u.immdata_desc.immediate_data[0]; struct ehea_vsgentry *sg1entry = &swqe->u.immdata_desc.sg_entry; u64 tmp_addr; /* Packet is any nonTSO type * * Copy as much as possible skb->data to immediate data and * the rest to sg1entry */ if (skb_data_size >= SWQE2_MAX_IMM) { /* copy immediate data */ memcpy(imm_data, skb->data, SWQE2_MAX_IMM); swqe->immediate_data_length = SWQE2_MAX_IMM; if (skb_data_size > SWQE2_MAX_IMM) { /* copy sg1entry data */ sg1entry->l_key = lkey; sg1entry->len = skb_data_size - SWQE2_MAX_IMM; tmp_addr = (u64)(skb->data + SWQE2_MAX_IMM); sg1entry->vaddr = tmp_addr; swqe->descriptors++; } } else { memcpy(imm_data, skb->data, skb_data_size); swqe->immediate_data_length = skb_data_size; } } static inline void write_swqe2_data(struct sk_buff *skb, struct net_device *dev, struct ehea_swqe *swqe, u32 lkey) { struct ehea_vsgentry *sg_list, *sg1entry, *sgentry; skb_frag_t *frag; int nfrags, sg1entry_contains_frag_data, i; u64 tmp_addr; nfrags = skb_shinfo(skb)->nr_frags; sg1entry = &swqe->u.immdata_desc.sg_entry; sg_list = (struct ehea_vsgentry*)&swqe->u.immdata_desc.sg_list; swqe->descriptors = 0; sg1entry_contains_frag_data = 0; if ((dev->features & NETIF_F_TSO) && skb_shinfo(skb)->gso_size) write_swqe2_TSO(skb, swqe, lkey); else write_swqe2_nonTSO(skb, swqe, lkey); /* write descriptors */ if (nfrags > 0) { if (swqe->descriptors == 0) { /* sg1entry not yet used */ frag = &skb_shinfo(skb)->frags[0]; /* copy sg1entry data */ sg1entry->l_key = lkey; sg1entry->len = frag->size; tmp_addr = (u64)(page_address(frag->page) + frag->page_offset); sg1entry->vaddr = tmp_addr; swqe->descriptors++; sg1entry_contains_frag_data = 1; } for (i = sg1entry_contains_frag_data; i < nfrags; i++) { frag = &skb_shinfo(skb)->frags[i]; sgentry = &sg_list[i - sg1entry_contains_frag_data]; sgentry->l_key = lkey; sgentry->len = frag->size; tmp_addr = (u64)(page_address(frag->page) + frag->page_offset); sgentry->vaddr = tmp_addr; swqe->descriptors++; } } } static int ehea_broadcast_reg_helper(struct ehea_port *port, u32 hcallid) { int ret = 0; u64 hret; u8 reg_type; /* De/Register untagged packets */ reg_type = EHEA_BCMC_BROADCAST | EHEA_BCMC_UNTAGGED; hret = ehea_h_reg_dereg_bcmc(port->adapter->handle, port->logical_port_id, reg_type, port->mac_addr, 0, hcallid); if (hret != H_SUCCESS) { ehea_error("reg_dereg_bcmc failed (tagged)"); ret = -EIO; goto out_herr; } /* De/Register VLAN packets */ reg_type = EHEA_BCMC_BROADCAST | EHEA_BCMC_VLANID_ALL; hret = ehea_h_reg_dereg_bcmc(port->adapter->handle, port->logical_port_id, reg_type, port->mac_addr, 0, hcallid); if (hret != H_SUCCESS) { ehea_error("reg_dereg_bcmc failed (vlan)"); ret = -EIO; } out_herr: return ret; } static int ehea_set_mac_addr(struct net_device *dev, void *sa) { struct ehea_port *port = netdev_priv(dev); struct sockaddr *mac_addr = sa; struct hcp_ehea_port_cb0 *cb0; int ret; u64 hret; if (!is_valid_ether_addr(mac_addr->sa_data)) { ret = -EADDRNOTAVAIL; goto out; } cb0 = kzalloc(PAGE_SIZE, GFP_KERNEL); if (!cb0) { ehea_error("no mem for cb0"); ret = -ENOMEM; goto out; } memcpy(&(cb0->port_mac_addr), &(mac_addr->sa_data[0]), ETH_ALEN); cb0->port_mac_addr = cb0->port_mac_addr >> 16; hret = ehea_h_modify_ehea_port(port->adapter->handle, port->logical_port_id, H_PORT_CB0, EHEA_BMASK_SET(H_PORT_CB0_MAC, 1), cb0); if (hret != H_SUCCESS) { ret = -EIO; goto out_free; } memcpy(dev->dev_addr, mac_addr->sa_data, dev->addr_len); /* Deregister old MAC in pHYP */ ret = ehea_broadcast_reg_helper(port, H_DEREG_BCMC); if (ret) goto out_free; port->mac_addr = cb0->port_mac_addr << 16; /* Register new MAC in pHYP */ ret = ehea_broadcast_reg_helper(port, H_REG_BCMC); if (ret) goto out_free; ret = 0; out_free: kfree(cb0); out: return ret; } static void ehea_promiscuous_error(u64 hret, int enable) { ehea_info("Hypervisor denied %sabling promiscuous mode.%s", enable == 1 ? "en" : "dis", hret != H_AUTHORITY ? "" : " Another partition owning a " "logical port on the same physical port might have altered " "promiscuous mode first."); } static void ehea_promiscuous(struct net_device *dev, int enable) { struct ehea_port *port = netdev_priv(dev); struct hcp_ehea_port_cb7 *cb7; u64 hret; if ((enable && port->promisc) || (!enable && !port->promisc)) return; cb7 = kzalloc(PAGE_SIZE, GFP_ATOMIC); if (!cb7) { ehea_error("no mem for cb7"); goto out; } /* Modify Pxs_DUCQPN in CB7 */ cb7->def_uc_qpn = enable == 1 ? port->port_res[0].qp->fw_handle : 0; hret = ehea_h_modify_ehea_port(port->adapter->handle, port->logical_port_id, H_PORT_CB7, H_PORT_CB7_DUCQPN, cb7); if (hret) { ehea_promiscuous_error(hret, enable); goto out; } port->promisc = enable; out: kfree(cb7); return; } static u64 ehea_multicast_reg_helper(struct ehea_port *port, u64 mc_mac_addr, u32 hcallid) { u64 hret; u8 reg_type; reg_type = EHEA_BCMC_SCOPE_ALL | EHEA_BCMC_MULTICAST | EHEA_BCMC_UNTAGGED; hret = ehea_h_reg_dereg_bcmc(port->adapter->handle, port->logical_port_id, reg_type, mc_mac_addr, 0, hcallid); if (hret) goto out; reg_type = EHEA_BCMC_SCOPE_ALL | EHEA_BCMC_MULTICAST | EHEA_BCMC_VLANID_ALL; hret = ehea_h_reg_dereg_bcmc(port->adapter->handle, port->logical_port_id, reg_type, mc_mac_addr, 0, hcallid); out: return hret; } static int ehea_drop_multicast_list(struct net_device *dev) { struct ehea_port *port = netdev_priv(dev); struct ehea_mc_list *mc_entry = port->mc_list; struct list_head *pos; struct list_head *temp; int ret = 0; u64 hret; list_for_each_safe(pos, temp, &(port->mc_list->list)) { mc_entry = list_entry(pos, struct ehea_mc_list, list); hret = ehea_multicast_reg_helper(port, mc_entry->macaddr, H_DEREG_BCMC); if (hret) { ehea_error("failed deregistering mcast MAC"); ret = -EIO; } list_del(pos); kfree(mc_entry); } return ret; } static void ehea_allmulti(struct net_device *dev, int enable) { struct ehea_port *port = netdev_priv(dev); u64 hret; if (!port->allmulti) { if (enable) { /* Enable ALLMULTI */ ehea_drop_multicast_list(dev); hret = ehea_multicast_reg_helper(port, 0, H_REG_BCMC); if (!hret) port->allmulti = 1; else ehea_error("failed enabling IFF_ALLMULTI"); } } else if (!enable) { /* Disable ALLMULTI */ hret = ehea_multicast_reg_helper(port, 0, H_DEREG_BCMC); if (!hret) port->allmulti = 0; else ehea_error("failed disabling IFF_ALLMULTI"); } } static void ehea_add_multicast_entry(struct ehea_port* port, u8* mc_mac_addr) { struct ehea_mc_list *ehea_mcl_entry; u64 hret; ehea_mcl_entry = kzalloc(sizeof(*ehea_mcl_entry), GFP_ATOMIC); if (!ehea_mcl_entry) { ehea_error("no mem for mcl_entry"); return; } INIT_LIST_HEAD(&ehea_mcl_entry->list); memcpy(&ehea_mcl_entry->macaddr, mc_mac_addr, ETH_ALEN); hret = ehea_multicast_reg_helper(port, ehea_mcl_entry->macaddr, H_REG_BCMC); if (!hret) list_add(&ehea_mcl_entry->list, &port->mc_list->list); else { ehea_error("failed registering mcast MAC"); kfree(ehea_mcl_entry); } } static void ehea_set_multicast_list(struct net_device *dev) { struct ehea_port *port = netdev_priv(dev); struct dev_mc_list *k_mcl_entry; int ret, i; if (dev->flags & IFF_PROMISC) { ehea_promiscuous(dev, 1); return; } ehea_promiscuous(dev, 0); if (dev->flags & IFF_ALLMULTI) { ehea_allmulti(dev, 1); return; } ehea_allmulti(dev, 0); if (dev->mc_count) { ret = ehea_drop_multicast_list(dev); if (ret) { /* Dropping the current multicast list failed. * Enabling ALL_MULTI is the best we can do. */ ehea_allmulti(dev, 1); } if (dev->mc_count > port->adapter->max_mc_mac) { ehea_info("Mcast registration limit reached (0x%lx). " "Use ALLMULTI!", port->adapter->max_mc_mac); goto out; } for (i = 0, k_mcl_entry = dev->mc_list; i < dev->mc_count; i++, k_mcl_entry = k_mcl_entry->next) { ehea_add_multicast_entry(port, k_mcl_entry->dmi_addr); } } out: return; } static int ehea_change_mtu(struct net_device *dev, int new_mtu) { if ((new_mtu < 68) || (new_mtu > EHEA_MAX_PACKET_SIZE)) return -EINVAL; dev->mtu = new_mtu; return 0; } static void ehea_xmit2(struct sk_buff *skb, struct net_device *dev, struct ehea_swqe *swqe, u32 lkey) { if (skb->protocol == htons(ETH_P_IP)) { /* IPv4 */ swqe->tx_control |= EHEA_SWQE_CRC | EHEA_SWQE_IP_CHECKSUM | EHEA_SWQE_TCP_CHECKSUM | EHEA_SWQE_IMM_DATA_PRESENT | EHEA_SWQE_DESCRIPTORS_PRESENT; write_ip_start_end(swqe, skb); if (skb->nh.iph->protocol == IPPROTO_UDP) { if ((skb->nh.iph->frag_off & IP_MF) || (skb->nh.iph->frag_off & IP_OFFSET)) /* IP fragment, so don't change cs */ swqe->tx_control &= ~EHEA_SWQE_TCP_CHECKSUM; else write_udp_offset_end(swqe, skb); } else if (skb->nh.iph->protocol == IPPROTO_TCP) { write_tcp_offset_end(swqe, skb); } /* icmp (big data) and ip segmentation packets (all other ip packets) do not require any special handling */ } else { /* Other Ethernet Protocol */ swqe->tx_control |= EHEA_SWQE_CRC | EHEA_SWQE_IMM_DATA_PRESENT | EHEA_SWQE_DESCRIPTORS_PRESENT; } write_swqe2_data(skb, dev, swqe, lkey); } static void ehea_xmit3(struct sk_buff *skb, struct net_device *dev, struct ehea_swqe *swqe) { int nfrags = skb_shinfo(skb)->nr_frags; u8 *imm_data = &swqe->u.immdata_nodesc.immediate_data[0]; skb_frag_t *frag; int i; if (skb->protocol == htons(ETH_P_IP)) { /* IPv4 */ write_ip_start_end(swqe, skb); if (skb->nh.iph->protocol == IPPROTO_TCP) { swqe->tx_control |= EHEA_SWQE_CRC | EHEA_SWQE_IP_CHECKSUM | EHEA_SWQE_TCP_CHECKSUM | EHEA_SWQE_IMM_DATA_PRESENT; write_tcp_offset_end(swqe, skb); } else if (skb->nh.iph->protocol == IPPROTO_UDP) { if ((skb->nh.iph->frag_off & IP_MF) || (skb->nh.iph->frag_off & IP_OFFSET)) /* IP fragment, so don't change cs */ swqe->tx_control |= EHEA_SWQE_CRC | EHEA_SWQE_IMM_DATA_PRESENT; else { swqe->tx_control |= EHEA_SWQE_CRC | EHEA_SWQE_IP_CHECKSUM | EHEA_SWQE_TCP_CHECKSUM | EHEA_SWQE_IMM_DATA_PRESENT; write_udp_offset_end(swqe, skb); } } else { /* icmp (big data) and ip segmentation packets (all other ip packets) */ swqe->tx_control |= EHEA_SWQE_CRC | EHEA_SWQE_IP_CHECKSUM | EHEA_SWQE_IMM_DATA_PRESENT; } } else { /* Other Ethernet Protocol */ swqe->tx_control |= EHEA_SWQE_CRC | EHEA_SWQE_IMM_DATA_PRESENT; } /* copy (immediate) data */ if (nfrags == 0) { /* data is in a single piece */ memcpy(imm_data, skb->data, skb->len); } else { /* first copy data from the skb->data buffer ... */ memcpy(imm_data, skb->data, skb->len - skb->data_len); imm_data += skb->len - skb->data_len; /* ... then copy data from the fragments */ for (i = 0; i < nfrags; i++) { frag = &skb_shinfo(skb)->frags[i]; memcpy(imm_data, page_address(frag->page) + frag->page_offset, frag->size); imm_data += frag->size; } } swqe->immediate_data_length = skb->len; dev_kfree_skb(skb); } static int ehea_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ehea_port *port = netdev_priv(dev); struct ehea_swqe *swqe; unsigned long flags; u32 lkey; int swqe_index; struct ehea_port_res *pr = &port->port_res[0]; spin_lock(&pr->xmit_lock); swqe = ehea_get_swqe(pr->qp, &swqe_index); memset(swqe, 0, SWQE_HEADER_SIZE); atomic_dec(&pr->swqe_avail); if (skb->len <= SWQE3_MAX_IMM) { u32 sig_iv = port->sig_comp_iv; u32 swqe_num = pr->swqe_id_counter; ehea_xmit3(skb, dev, swqe); swqe->wr_id = EHEA_BMASK_SET(EHEA_WR_ID_TYPE, EHEA_SWQE3_TYPE) | EHEA_BMASK_SET(EHEA_WR_ID_COUNT, swqe_num); if (pr->swqe_ll_count >= (sig_iv - 1)) { swqe->wr_id |= EHEA_BMASK_SET(EHEA_WR_ID_REFILL, sig_iv); swqe->tx_control |= EHEA_SWQE_SIGNALLED_COMPLETION; pr->swqe_ll_count = 0; } else pr->swqe_ll_count += 1; } else { swqe->wr_id = EHEA_BMASK_SET(EHEA_WR_ID_TYPE, EHEA_SWQE2_TYPE) | EHEA_BMASK_SET(EHEA_WR_ID_COUNT, pr->swqe_id_counter) | EHEA_BMASK_SET(EHEA_WR_ID_INDEX, pr->sq_skba.index); pr->sq_skba.arr[pr->sq_skba.index] = skb; pr->sq_skba.index++; pr->sq_skba.index &= (pr->sq_skba.len - 1); lkey = pr->send_mr.lkey; ehea_xmit2(skb, dev, swqe, lkey); if (pr->swqe_count >= (EHEA_SIG_IV_LONG - 1)) { swqe->wr_id |= EHEA_BMASK_SET(EHEA_WR_ID_REFILL, EHEA_SIG_IV_LONG); swqe->tx_control |= EHEA_SWQE_SIGNALLED_COMPLETION; pr->swqe_count = 0; } else pr->swqe_count += 1; } pr->swqe_id_counter += 1; if (port->vgrp && vlan_tx_tag_present(skb)) { swqe->tx_control |= EHEA_SWQE_VLAN_INSERT; swqe->vlan_tag = vlan_tx_tag_get(skb); } if (netif_msg_tx_queued(port)) { ehea_info("post swqe on QP %d", pr->qp->init_attr.qp_nr); ehea_dump(swqe, 512, "swqe"); } ehea_post_swqe(pr->qp, swqe); pr->tx_packets++; if (unlikely(atomic_read(&pr->swqe_avail) <= 1)) { spin_lock_irqsave(&pr->netif_queue, flags); if (unlikely(atomic_read(&pr->swqe_avail) <= 1)) { netif_stop_queue(dev); pr->queue_stopped = 1; } spin_unlock_irqrestore(&pr->netif_queue, flags); } dev->trans_start = jiffies; spin_unlock(&pr->xmit_lock); return NETDEV_TX_OK; } static void ehea_vlan_rx_register(struct net_device *dev, struct vlan_group *grp) { struct ehea_port *port = netdev_priv(dev); struct ehea_adapter *adapter = port->adapter; struct hcp_ehea_port_cb1 *cb1; u64 hret; port->vgrp = grp; cb1 = kzalloc(PAGE_SIZE, GFP_KERNEL); if (!cb1) { ehea_error("no mem for cb1"); goto out; } if (grp) memset(cb1->vlan_filter, 0, sizeof(cb1->vlan_filter)); else memset(cb1->vlan_filter, 0xFF, sizeof(cb1->vlan_filter)); hret = ehea_h_modify_ehea_port(adapter->handle, port->logical_port_id, H_PORT_CB1, H_PORT_CB1_ALL, cb1); if (hret != H_SUCCESS) ehea_error("modify_ehea_port failed"); kfree(cb1); out: return; } static void ehea_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) { struct ehea_port *port = netdev_priv(dev); struct ehea_adapter *adapter = port->adapter; struct hcp_ehea_port_cb1 *cb1; int index; u64 hret; cb1 = kzalloc(PAGE_SIZE, GFP_KERNEL); if (!cb1) { ehea_error("no mem for cb1"); goto out; } hret = ehea_h_query_ehea_port(adapter->handle, port->logical_port_id, H_PORT_CB1, H_PORT_CB1_ALL, cb1); if (hret != H_SUCCESS) { ehea_error("query_ehea_port failed"); goto out; } index = (vid / 64); cb1->vlan_filter[index] |= ((u64)(1 << (vid & 0x3F))); hret = ehea_h_modify_ehea_port(adapter->handle, port->logical_port_id, H_PORT_CB1, H_PORT_CB1_ALL, cb1); if (hret != H_SUCCESS) ehea_error("modify_ehea_port failed"); out: kfree(cb1); return; } static void ehea_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) { struct ehea_port *port = netdev_priv(dev); struct ehea_adapter *adapter = port->adapter; struct hcp_ehea_port_cb1 *cb1; int index; u64 hret; if (port->vgrp) port->vgrp->vlan_devices[vid] = NULL; cb1 = kzalloc(PAGE_SIZE, GFP_KERNEL); if (!cb1) { ehea_error("no mem for cb1"); goto out; } hret = ehea_h_query_ehea_port(adapter->handle, port->logical_port_id, H_PORT_CB1, H_PORT_CB1_ALL, cb1); if (hret != H_SUCCESS) { ehea_error("query_ehea_port failed"); goto out; } index = (vid / 64); cb1->vlan_filter[index] &= ~((u64)(1 << (vid & 0x3F))); hret = ehea_h_modify_ehea_port(adapter->handle, port->logical_port_id, H_PORT_CB1, H_PORT_CB1_ALL, cb1); if (hret != H_SUCCESS) ehea_error("modify_ehea_port failed"); out: kfree(cb1); return; } int ehea_activate_qp(struct ehea_adapter *adapter, struct ehea_qp *qp) { int ret = -EIO; u64 hret; u16 dummy16 = 0; u64 dummy64 = 0; struct hcp_modify_qp_cb0* cb0; cb0 = kzalloc(PAGE_SIZE, GFP_KERNEL); if (!cb0) { ret = -ENOMEM; goto out; } hret = ehea_h_query_ehea_qp(adapter->handle, 0, qp->fw_handle, EHEA_BMASK_SET(H_QPCB0_ALL, 0xFFFF), cb0); if (hret != H_SUCCESS) { ehea_error("query_ehea_qp failed (1)"); goto out; } cb0->qp_ctl_reg = H_QP_CR_STATE_INITIALIZED; hret = ehea_h_modify_ehea_qp(adapter->handle, 0, qp->fw_handle, EHEA_BMASK_SET(H_QPCB0_QP_CTL_REG, 1), cb0, &dummy64, &dummy64, &dummy16, &dummy16); if (hret != H_SUCCESS) { ehea_error("modify_ehea_qp failed (1)"); goto out; } hret = ehea_h_query_ehea_qp(adapter->handle, 0, qp->fw_handle, EHEA_BMASK_SET(H_QPCB0_ALL, 0xFFFF), cb0); if (hret != H_SUCCESS) { ehea_error("query_ehea_qp failed (2)"); goto out; } cb0->qp_ctl_reg = H_QP_CR_ENABLED | H_QP_CR_STATE_INITIALIZED; hret = ehea_h_modify_ehea_qp(adapter->handle, 0, qp->fw_handle, EHEA_BMASK_SET(H_QPCB0_QP_CTL_REG, 1), cb0, &dummy64, &dummy64, &dummy16, &dummy16); if (hret != H_SUCCESS) { ehea_error("modify_ehea_qp failed (2)"); goto out; } hret = ehea_h_query_ehea_qp(adapter->handle, 0, qp->fw_handle, EHEA_BMASK_SET(H_QPCB0_ALL, 0xFFFF), cb0); if (hret != H_SUCCESS) { ehea_error("query_ehea_qp failed (3)"); goto out; } cb0->qp_ctl_reg = H_QP_CR_ENABLED | H_QP_CR_STATE_RDY2SND; hret = ehea_h_modify_ehea_qp(adapter->handle, 0, qp->fw_handle, EHEA_BMASK_SET(H_QPCB0_QP_CTL_REG, 1), cb0, &dummy64, &dummy64, &dummy16, &dummy16); if (hret != H_SUCCESS) { ehea_error("modify_ehea_qp failed (3)"); goto out; } hret = ehea_h_query_ehea_qp(adapter->handle, 0, qp->fw_handle, EHEA_BMASK_SET(H_QPCB0_ALL, 0xFFFF), cb0); if (hret != H_SUCCESS) { ehea_error("query_ehea_qp failed (4)"); goto out; } ret = 0; out: kfree(cb0); return ret; } static int ehea_port_res_setup(struct ehea_port *port, int def_qps, int add_tx_qps) { int ret, i; struct port_res_cfg pr_cfg, pr_cfg_small_rx; enum ehea_eq_type eq_type = EHEA_EQ; port->qp_eq = ehea_create_eq(port->adapter, eq_type, EHEA_MAX_ENTRIES_EQ, 1); if (!port->qp_eq) { ret = -EINVAL; ehea_error("ehea_create_eq failed (qp_eq)"); goto out_kill_eq; } pr_cfg.max_entries_rcq = rq1_entries + rq2_entries + rq3_entries; pr_cfg.max_entries_scq = sq_entries; pr_cfg.max_entries_sq = sq_entries; pr_cfg.max_entries_rq1 = rq1_entries; pr_cfg.max_entries_rq2 = rq2_entries; pr_cfg.max_entries_rq3 = rq3_entries; pr_cfg_small_rx.max_entries_rcq = 1; pr_cfg_small_rx.max_entries_scq = sq_entries; pr_cfg_small_rx.max_entries_sq = sq_entries; pr_cfg_small_rx.max_entries_rq1 = 1; pr_cfg_small_rx.max_entries_rq2 = 1; pr_cfg_small_rx.max_entries_rq3 = 1; for (i = 0; i < def_qps; i++) { ret = ehea_init_port_res(port, &port->port_res[i], &pr_cfg, i); if (ret) goto out_clean_pr; } for (i = def_qps; i < def_qps + add_tx_qps; i++) { ret = ehea_init_port_res(port, &port->port_res[i], &pr_cfg_small_rx, i); if (ret) goto out_clean_pr; } return 0; out_clean_pr: while (--i >= 0) ehea_clean_portres(port, &port->port_res[i]); out_kill_eq: ehea_destroy_eq(port->qp_eq); return ret; } static int ehea_clean_all_portres(struct ehea_port *port) { int ret = 0; int i; for(i = 0; i < port->num_def_qps + port->num_add_tx_qps; i++) ret |= ehea_clean_portres(port, &port->port_res[i]); ret |= ehea_destroy_eq(port->qp_eq); return ret; } static int ehea_up(struct net_device *dev) { int ret, i; struct ehea_port *port = netdev_priv(dev); u64 mac_addr = 0; if (port->state == EHEA_PORT_UP) return 0; ret = ehea_port_res_setup(port, port->num_def_qps, port->num_add_tx_qps); if (ret) { ehea_error("port_res_failed"); goto out; } /* Set default QP for this port */ ret = ehea_configure_port(port); if (ret) { ehea_error("ehea_configure_port failed. ret:%d", ret); goto out_clean_pr; } ret = ehea_broadcast_reg_helper(port, H_REG_BCMC); if (ret) { ret = -EIO; ehea_error("out_clean_pr"); goto out_clean_pr; } mac_addr = (*(u64*)dev->dev_addr) >> 16; ret = ehea_reg_interrupts(dev); if (ret) { ehea_error("out_dereg_bc"); goto out_dereg_bc; } for(i = 0; i < port->num_def_qps + port->num_add_tx_qps; i++) { ret = ehea_activate_qp(port->adapter, port->port_res[i].qp); if (ret) { ehea_error("activate_qp failed"); goto out_free_irqs; } } for(i = 0; i < port->num_def_qps; i++) { ret = ehea_fill_port_res(&port->port_res[i]); if (ret) { ehea_error("out_free_irqs"); goto out_free_irqs; } } ret = 0; port->state = EHEA_PORT_UP; goto out; out_free_irqs: ehea_free_interrupts(dev); out_dereg_bc: ehea_broadcast_reg_helper(port, H_DEREG_BCMC); out_clean_pr: ehea_clean_all_portres(port); out: return ret; } static int ehea_open(struct net_device *dev) { int ret; struct ehea_port *port = netdev_priv(dev); down(&port->port_lock); if (netif_msg_ifup(port)) ehea_info("enabling port %s", dev->name); ret = ehea_up(dev); if (!ret) netif_start_queue(dev); up(&port->port_lock); return ret; } static int ehea_down(struct net_device *dev) { int ret, i; struct ehea_port *port = netdev_priv(dev); if (port->state == EHEA_PORT_DOWN) return 0; ehea_drop_multicast_list(dev); ehea_free_interrupts(dev); for (i = 0; i < port->num_def_qps + port->num_add_tx_qps; i++) tasklet_kill(&port->port_res[i].send_comp_task); ehea_broadcast_reg_helper(port, H_DEREG_BCMC); ret = ehea_clean_all_portres(port); port->state = EHEA_PORT_DOWN; return ret; } static int ehea_stop(struct net_device *dev) { int ret; struct ehea_port *port = netdev_priv(dev); if (netif_msg_ifdown(port)) ehea_info("disabling port %s", dev->name); flush_workqueue(port->adapter->ehea_wq); down(&port->port_lock); netif_stop_queue(dev); ret = ehea_down(dev); up(&port->port_lock); return ret; } static void ehea_reset_port(struct work_struct *work) { int ret; struct ehea_port *port = container_of(work, struct ehea_port, reset_task); struct net_device *dev = port->netdev; port->resets++; down(&port->port_lock); netif_stop_queue(dev); netif_poll_disable(dev); ret = ehea_down(dev); if (ret) ehea_error("ehea_down failed. not all resources are freed"); ret = ehea_up(dev); if (ret) { ehea_error("Reset device %s failed: ret=%d", dev->name, ret); goto out; } if (netif_msg_timer(port)) ehea_info("Device %s resetted successfully", dev->name); netif_poll_enable(dev); netif_wake_queue(dev); out: up(&port->port_lock); return; } static void ehea_tx_watchdog(struct net_device *dev) { struct ehea_port *port = netdev_priv(dev); if (netif_carrier_ok(dev)) queue_work(port->adapter->ehea_wq, &port->reset_task); } int ehea_sense_adapter_attr(struct ehea_adapter *adapter) { struct hcp_query_ehea *cb; u64 hret; int ret; cb = kzalloc(PAGE_SIZE, GFP_KERNEL); if (!cb) { ret = -ENOMEM; goto out; } hret = ehea_h_query_ehea(adapter->handle, cb); if (hret != H_SUCCESS) { ret = -EIO; goto out_herr; } adapter->num_ports = cb->num_ports; adapter->max_mc_mac = cb->max_mc_mac - 1; ret = 0; out_herr: kfree(cb); out: return ret; } static int ehea_setup_single_port(struct ehea_port *port, struct device_node *dn) { int ret; u64 hret; struct net_device *dev = port->netdev; struct ehea_adapter *adapter = port->adapter; struct hcp_ehea_port_cb4 *cb4; u32 *dn_log_port_id; sema_init(&port->port_lock, 1); port->state = EHEA_PORT_DOWN; port->sig_comp_iv = sq_entries / 10; if (!dn) { ehea_error("bad device node: dn=%p", dn); ret = -EINVAL; goto out; } port->of_dev_node = dn; /* Determine logical port id */ dn_log_port_id = (u32*)get_property(dn, "ibm,hea-port-no", NULL); if (!dn_log_port_id) { ehea_error("bad device node: dn_log_port_id=%p", dn_log_port_id); ret = -EINVAL; goto out; } port->logical_port_id = *dn_log_port_id; port->mc_list = kzalloc(sizeof(struct ehea_mc_list), GFP_KERNEL); if (!port->mc_list) { ret = -ENOMEM; goto out; } INIT_LIST_HEAD(&port->mc_list->list); ehea_set_portspeed(port, EHEA_SPEED_AUTONEG); ret = ehea_sense_port_attr(port); if (ret) goto out; /* Enable Jumbo frames */ cb4 = kzalloc(PAGE_SIZE, GFP_KERNEL); if (!cb4) { ehea_error("no mem for cb4"); } else { cb4->jumbo_frame = 1; hret = ehea_h_modify_ehea_port(adapter->handle, port->logical_port_id, H_PORT_CB4, H_PORT_CB4_JUMBO, cb4); if (hret != H_SUCCESS) { ehea_info("Jumbo frames not activated"); } kfree(cb4); } /* initialize net_device structure */ SET_MODULE_OWNER(dev); memcpy(dev->dev_addr, &port->mac_addr, ETH_ALEN); dev->open = ehea_open; dev->poll = ehea_poll; dev->weight = 64; dev->stop = ehea_stop; dev->hard_start_xmit = ehea_start_xmit; dev->get_stats = ehea_get_stats; dev->set_multicast_list = ehea_set_multicast_list; dev->set_mac_address = ehea_set_mac_addr; dev->change_mtu = ehea_change_mtu; dev->vlan_rx_register = ehea_vlan_rx_register; dev->vlan_rx_add_vid = ehea_vlan_rx_add_vid; dev->vlan_rx_kill_vid = ehea_vlan_rx_kill_vid; dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_TSO | NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_FILTER | NETIF_F_LLTX; dev->tx_timeout = &ehea_tx_watchdog; dev->watchdog_timeo = EHEA_WATCH_DOG_TIMEOUT; INIT_WORK(&port->reset_task, ehea_reset_port); ehea_set_ethtool_ops(dev); ret = register_netdev(dev); if (ret) { ehea_error("register_netdev failed. ret=%d", ret); goto out_free; } port->netdev = dev; ret = 0; goto out; out_free: kfree(port->mc_list); out: return ret; } static int ehea_setup_ports(struct ehea_adapter *adapter) { int ret; int port_setup_ok = 0; struct ehea_port *port; struct device_node *dn = NULL; struct net_device *dev; int i; /* get port properties for all ports */ for (i = 0; i < adapter->num_ports; i++) { if (adapter->port[i]) continue; /* port already up and running */ /* allocate memory for the port structures */ dev = alloc_etherdev(sizeof(struct ehea_port)); if (!dev) { ehea_error("no mem for net_device"); break; } port = netdev_priv(dev); port->adapter = adapter; port->netdev = dev; adapter->port[i] = port; port->msg_enable = netif_msg_init(msg_level, EHEA_MSG_DEFAULT); dn = of_find_node_by_name(dn, "ethernet"); ret = ehea_setup_single_port(port, dn); if (ret) { /* Free mem for this port struct. The others will be processed on rollback */ free_netdev(dev); adapter->port[i] = NULL; ehea_error("eHEA port %d setup failed, ret=%d", i, ret); } } of_node_put(dn); /* Check for succesfully set up ports */ for (i = 0; i < adapter->num_ports; i++) if (adapter->port[i]) port_setup_ok++; if (port_setup_ok) ret = 0; /* At least some ports are setup correctly */ else ret = -EINVAL; return ret; } static int __devinit ehea_probe(struct ibmebus_dev *dev, const struct of_device_id *id) { struct ehea_adapter *adapter; u64 *adapter_handle; int ret; adapter = kzalloc(sizeof(*adapter), GFP_KERNEL); if (!adapter) { ret = -ENOMEM; dev_err(&dev->ofdev.dev, "no mem for ehea_adapter\n"); goto out; } adapter_handle = (u64*)get_property(dev->ofdev.node, "ibm,hea-handle", NULL); if (!adapter_handle) { dev_err(&dev->ofdev.dev, "failed getting handle for adapter" " '%s'\n", dev->ofdev.node->full_name); ret = -ENODEV; goto out_free_ad; } adapter->handle = *adapter_handle; adapter->pd = EHEA_PD_ID; dev->ofdev.dev.driver_data = adapter; ret = ehea_reg_mr_adapter(adapter); if (ret) { dev_err(&dev->ofdev.dev, "reg_mr_adapter failed\n"); goto out_free_ad; } /* initialize adapter and ports */ /* get adapter properties */ ret = ehea_sense_adapter_attr(adapter); if (ret) { dev_err(&dev->ofdev.dev, "sense_adapter_attr failed: %d", ret); goto out_free_res; } dev_info(&dev->ofdev.dev, "%d eHEA ports found\n", adapter->num_ports); adapter->neq = ehea_create_eq(adapter, EHEA_NEQ, EHEA_MAX_ENTRIES_EQ, 1); if (!adapter->neq) { dev_err(&dev->ofdev.dev, "NEQ creation failed"); goto out_free_res; } tasklet_init(&adapter->neq_tasklet, ehea_neq_tasklet, (unsigned long)adapter); ret = ibmebus_request_irq(NULL, adapter->neq->attr.ist1, ehea_interrupt_neq, SA_INTERRUPT, "ehea_neq", adapter); if (ret) { dev_err(&dev->ofdev.dev, "requesting NEQ IRQ failed"); goto out_kill_eq; } adapter->ehea_wq = create_workqueue("ehea_wq"); if (!adapter->ehea_wq) goto out_free_irq; ret = ehea_setup_ports(adapter); if (ret) { dev_err(&dev->ofdev.dev, "setup_ports failed"); goto out_kill_wq; } ret = 0; goto out; out_kill_wq: destroy_workqueue(adapter->ehea_wq); out_free_irq: ibmebus_free_irq(NULL, adapter->neq->attr.ist1, adapter); out_kill_eq: ehea_destroy_eq(adapter->neq); out_free_res: ehea_h_free_resource(adapter->handle, adapter->mr.handle); out_free_ad: kfree(adapter); out: return ret; } static void ehea_shutdown_single_port(struct ehea_port *port) { unregister_netdev(port->netdev); kfree(port->mc_list); free_netdev(port->netdev); } static int __devexit ehea_remove(struct ibmebus_dev *dev) { struct ehea_adapter *adapter = dev->ofdev.dev.driver_data; u64 hret; int i; for (i = 0; i < adapter->num_ports; i++) if (adapter->port[i]) { ehea_shutdown_single_port(adapter->port[i]); adapter->port[i] = NULL; } destroy_workqueue(adapter->ehea_wq); ibmebus_free_irq(NULL, adapter->neq->attr.ist1, adapter); ehea_destroy_eq(adapter->neq); hret = ehea_h_free_resource(adapter->handle, adapter->mr.handle); if (hret) { dev_err(&dev->ofdev.dev, "free_resource_mr failed"); return -EIO; } kfree(adapter); return 0; } static int check_module_parm(void) { int ret = 0; if ((rq1_entries < EHEA_MIN_ENTRIES_QP) || (rq1_entries > EHEA_MAX_ENTRIES_RQ1)) { ehea_info("Bad parameter: rq1_entries"); ret = -EINVAL; } if ((rq2_entries < EHEA_MIN_ENTRIES_QP) || (rq2_entries > EHEA_MAX_ENTRIES_RQ2)) { ehea_info("Bad parameter: rq2_entries"); ret = -EINVAL; } if ((rq3_entries < EHEA_MIN_ENTRIES_QP) || (rq3_entries > EHEA_MAX_ENTRIES_RQ3)) { ehea_info("Bad parameter: rq3_entries"); ret = -EINVAL; } if ((sq_entries < EHEA_MIN_ENTRIES_QP) || (sq_entries > EHEA_MAX_ENTRIES_SQ)) { ehea_info("Bad parameter: sq_entries"); ret = -EINVAL; } return ret; } static struct of_device_id ehea_device_table[] = { { .name = "lhea", .compatible = "IBM,lhea", }, {}, }; static struct ibmebus_driver ehea_driver = { .name = "ehea", .id_table = ehea_device_table, .probe = ehea_probe, .remove = ehea_remove, }; int __init ehea_module_init(void) { int ret; printk(KERN_INFO "IBM eHEA ethernet device driver (Release %s)\n", DRV_VERSION); ret = check_module_parm(); if (ret) goto out; ret = ibmebus_register_driver(&ehea_driver); if (ret) ehea_error("failed registering eHEA device driver on ebus"); out: return ret; } static void __exit ehea_module_exit(void) { ibmebus_unregister_driver(&ehea_driver); } module_init(ehea_module_init); module_exit(ehea_module_exit);