aboutsummaryrefslogblamecommitdiffstats
path: root/arch/powerpc/Kconfig
blob: 01feed0e2a15a9bf45f48f701a36f7384f8c9adc (plain) (tree)



























                                                                 




















                              
                 






















                                                    






















                                                                           





                               
                

                           
                  












                                                                              

                      


                             

                      


                             



                      






                       











                                  
                        







                                                                             
                        


                                 
                        

                  






                                             
              

                          




























                                                                            
                                      
























































                                                                               
                                            













                                     
                                     

















                                                                 
                                  












                                                         
                        

                                 
                             




                                                                         
                        
                               
                       
                         
                             




                                              
                                        
                                  




                                     
                      
                             
                             



                                                                 
                                                       
                        
                               
                             






                                             
                             
                             




                                                                           

                                                      
                                             
                       
                         
                             
 










                                             
                                                                   


                 








                           

                                     
                           




                                           
 



                 




                            

                           







                                             


                                                


                                                      



                 










                                                                        







                                                                     




























































                                                                               
                                          

                                          
                                          
















                                                                      
                                    










































































                                                                              




                                       




                               

                                            
 
                            
                  
                        
 
                             
                  
                                     






                                      



                                 

                            
                        






                                                                              









                                                                            

                                               


                                                                             
                                                                 
















































                                                                             
                        











                                                                              




                         


                       
                                       

                 





            


              




                                                                     

                                                                                                         











                                                                             


                                       
                        






                                                                         
                               
























































































































































                                                                                     


                   
                                    

     













                                               


                               

                                      

                                     
                        







                                                                          









                                   
# For a description of the syntax of this configuration file,
# see Documentation/kbuild/kconfig-language.txt.
#

mainmenu "Linux/PowerPC Kernel Configuration"

config PPC64
	bool "64-bit kernel"
	default n
	help
	  This option selects whether a 32-bit or a 64-bit kernel
	  will be built.

config PPC32
	bool
	default y if !PPC64

config 64BIT
	bool
	default y if PPC64

config PPC_MERGE
	def_bool y

config MMU
	bool
	default y

config GENERIC_HARDIRQS
	bool
	default y

config RWSEM_GENERIC_SPINLOCK
	bool

config RWSEM_XCHGADD_ALGORITHM
	bool
	default y

config GENERIC_CALIBRATE_DELAY
	bool
	default y

config PPC
	bool
	default y

config EARLY_PRINTK
	bool
	default y

config COMPAT
	bool
	default y if PPC64

config SYSVIPC_COMPAT
	bool
	depends on COMPAT && SYSVIPC
	default y

# All PPC32s use generic nvram driver through ppc_md
config GENERIC_NVRAM
	bool
	default y if PPC32

config SCHED_NO_NO_OMIT_FRAME_POINTER
	bool
	default y

config ARCH_MAY_HAVE_PC_FDC
	bool
	default y

config PPC_OF
	def_bool y

config PPC_UDBG_16550
	bool
	default n

config CRASH_DUMP
	bool "kernel crash dumps (EXPERIMENTAL)"
	depends on PPC_MULTIPLATFORM
	depends on EXPERIMENTAL
	help
	  Build a kernel suitable for use as a kdump capture kernel.
	  The kernel will be linked at a different address than normal, and
	  so can only be used for Kdump.

	  Don't change this unless you know what you are doing.

config GENERIC_TBSYNC
	bool
	default y if PPC32 && SMP
	default n

menu "Processor support"
choice
	prompt "Processor Type"
	depends on PPC32
	default 6xx

config CLASSIC32
	bool "6xx/7xx/74xx"
	select PPC_FPU
	select 6xx
	help
	  There are four families of PowerPC chips supported.  The more common
	  types (601, 603, 604, 740, 750, 7400), the Motorola embedded
	  versions (821, 823, 850, 855, 860, 52xx, 82xx, 83xx), the AMCC
	  embedded versions (403 and 405) and the high end 64 bit Power
	  processors (POWER 3, POWER4, and IBM PPC970 also known as G5).
	  
	  Unless you are building a kernel for one of the embedded processor
	  systems, 64 bit IBM RS/6000 or an Apple G5, choose 6xx.
	  Note that the kernel runs in 32-bit mode even on 64-bit chips.

config PPC_52xx
	bool "Freescale 52xx"
	select 6xx
	select PPC_FPU
	
config PPC_82xx
	bool "Freescale 82xx"
	select 6xx
	select PPC_FPU

config PPC_83xx
	bool "Freescale 83xx"
	select 6xx
	select FSL_SOC
	select 83xx
	select PPC_FPU

config 40x
	bool "AMCC 40x"

config 44x
	bool "AMCC 44x"

config 8xx
	bool "Freescale 8xx"

config E200
	bool "Freescale e200"

config E500
	bool "Freescale e500"
endchoice

config POWER4_ONLY
	bool "Optimize for POWER4"
	depends on PPC64
	default n
	---help---
	  Cause the compiler to optimize for POWER4/POWER5/PPC970 processors.
	  The resulting binary will not work on POWER3 or RS64 processors
	  when compiled with binutils 2.15 or later.

config POWER3
	bool
	depends on PPC64
	default y if !POWER4_ONLY

config POWER4
	depends on PPC64
	def_bool y

config 6xx
	bool

# this is temp to handle compat with arch=ppc
config 83xx
	bool

config PPC_FPU
	bool
	default y if PPC64

config BOOKE
	bool
	depends on E200 || E500
	default y

config FSL_BOOKE
	bool
	depends on E200 || E500
	default y

config PTE_64BIT
	bool
	depends on 44x || E500
	default y if 44x
	default y if E500 && PHYS_64BIT

config PHYS_64BIT
	bool 'Large physical address support' if E500
	depends on 44x || E500
	default y if 44x
	---help---
	  This option enables kernel support for larger than 32-bit physical
	  addresses.  This features is not be available on all e500 cores.

	  If in doubt, say N here.

config ALTIVEC
	bool "AltiVec Support"
	depends on CLASSIC32 || POWER4
	---help---
	  This option enables kernel support for the Altivec extensions to the
	  PowerPC processor. The kernel currently supports saving and restoring
	  altivec registers, and turning on the 'altivec enable' bit so user
	  processes can execute altivec instructions.

	  This option is only usefully if you have a processor that supports
	  altivec (G4, otherwise known as 74xx series), but does not have
	  any affect on a non-altivec cpu (it does, however add code to the
	  kernel).

	  If in doubt, say Y here.

config SPE
	bool "SPE Support"
	depends on E200 || E500
	---help---
	  This option enables kernel support for the Signal Processing
	  Extensions (SPE) to the PowerPC processor. The kernel currently
	  supports saving and restoring SPE registers, and turning on the
	  'spe enable' bit so user processes can execute SPE instructions.

	  This option is only useful if you have a processor that supports
	  SPE (e500, otherwise known as 85xx series), but does not have any
	  effect on a non-spe cpu (it does, however add code to the kernel).

	  If in doubt, say Y here.

config PPC_STD_MMU
	bool
	depends on 6xx || POWER3 || POWER4 || PPC64
	default y

config PPC_STD_MMU_32
	def_bool y
	depends on PPC_STD_MMU && PPC32

config SMP
	depends on PPC_STD_MMU
	bool "Symmetric multi-processing support"
	---help---
	  This enables support for systems with more than one CPU. If you have
	  a system with only one CPU, say N. If you have a system with more
	  than one CPU, say Y.  Note that the kernel does not currently
	  support SMP machines with 603/603e/603ev or PPC750 ("G3") processors
	  since they have inadequate hardware support for multiprocessor
	  operation.

	  If you say N here, the kernel will run on single and multiprocessor
	  machines, but will use only one CPU of a multiprocessor machine. If
	  you say Y here, the kernel will run on single-processor machines.
	  On a single-processor machine, the kernel will run faster if you say
	  N here.

	  If you don't know what to do here, say N.

config NR_CPUS
	int "Maximum number of CPUs (2-128)"
	range 2 128
	depends on SMP
	default "32" if PPC64
	default "4"

config NOT_COHERENT_CACHE
	bool
	depends on 4xx || 8xx || E200
	default y
endmenu

source "init/Kconfig"

menu "Platform support"
	depends on PPC64 || CLASSIC32

choice
	prompt "Machine type"
	default PPC_MULTIPLATFORM

config PPC_MULTIPLATFORM
	bool "Generic desktop/server/laptop"
	help
	  Select this option if configuring for an IBM pSeries or
	  RS/6000 machine, an Apple machine, or a PReP, CHRP,
	  Maple or Cell-based machine.

config PPC_ISERIES
	bool "IBM Legacy iSeries"
	depends on PPC64

config EMBEDDED6xx
	bool "Embedded 6xx/7xx/7xxx-based board"
	depends on PPC32 && BROKEN

config APUS
	bool "Amiga-APUS"
	depends on PPC32 && BROKEN
	help
	  Select APUS if configuring for a PowerUP Amiga.
	  More information is available at:
	  <http://linux-apus.sourceforge.net/>.
endchoice

config PPC_PSERIES
	depends on PPC_MULTIPLATFORM && PPC64
	bool "  IBM pSeries & new (POWER5-based) iSeries"
	select PPC_I8259
	select PPC_RTAS
	select RTAS_ERROR_LOGGING
	select PPC_UDBG_16550
	default y

config PPC_CHRP
	bool "  Common Hardware Reference Platform (CHRP) based machines"
	depends on PPC_MULTIPLATFORM && PPC32
	select PPC_I8259
	select PPC_INDIRECT_PCI
	select PPC_RTAS
	select PPC_MPC106
	select PPC_UDBG_16550
	default y

config PPC_PMAC
	bool "  Apple PowerMac based machines"
	depends on PPC_MULTIPLATFORM
	select PPC_INDIRECT_PCI if PPC32
	select PPC_MPC106 if PPC32
	default y

config PPC_PMAC64
	bool
	depends on PPC_PMAC && POWER4
	select U3_DART
	select MPIC_BROKEN_U3
	select GENERIC_TBSYNC
	default y

config PPC_PREP
	bool "  PowerPC Reference Platform (PReP) based machines"
	depends on PPC_MULTIPLATFORM && PPC32 && BROKEN
	select PPC_I8259
	select PPC_INDIRECT_PCI
	select PPC_UDBG_16550
	default y

config PPC_MAPLE
	depends on PPC_MULTIPLATFORM && PPC64
	bool "  Maple 970FX Evaluation Board"
	select U3_DART
	select MPIC_BROKEN_U3
	select GENERIC_TBSYNC
	select PPC_UDBG_16550
	default n
	help
          This option enables support for the Maple 970FX Evaluation Board.
	  For more informations, refer to <http://www.970eval.com>

config PPC_CELL
	bool "  Cell Broadband Processor Architecture"
	depends on PPC_MULTIPLATFORM && PPC64
	select PPC_RTAS
	select MMIO_NVRAM
	select PPC_UDBG_16550

config XICS
	depends on PPC_PSERIES
	bool
	default y

config U3_DART
	bool 
	depends on PPC_MULTIPLATFORM && PPC64
	default n

config MPIC
	depends on PPC_PSERIES || PPC_PMAC || PPC_MAPLE || PPC_CHRP
	bool
	default y

config PPC_RTAS
	bool
	default n

config RTAS_ERROR_LOGGING
	bool
	depends on PPC_RTAS
	default n

config RTAS_PROC
	bool "Proc interface to RTAS"
	depends on PPC_RTAS
	default y

config RTAS_FLASH
	tristate "Firmware flash interface"
	depends on PPC64 && RTAS_PROC

config MMIO_NVRAM
	bool
	default n

config MPIC_BROKEN_U3
	bool
	depends on PPC_MAPLE
	default y

config CELL_IIC
	depends on PPC_CELL
	bool
	default y

config IBMVIO
	depends on PPC_PSERIES || PPC_ISERIES
	bool
	default y

config IBMEBUS
	depends on PPC_PSERIES
	bool "Support for GX bus based adapters"
	help
	  Bus device driver for GX bus based adapters.

config PPC_MPC106
	bool
	default n

source "drivers/cpufreq/Kconfig"

config CPU_FREQ_PMAC
	bool "Support for Apple PowerBooks"
	depends on CPU_FREQ && ADB_PMU && PPC32
	select CPU_FREQ_TABLE
	help
	  This adds support for frequency switching on Apple PowerBooks,
	  this currently includes some models of iBook & Titanium
	  PowerBook.

config CPU_FREQ_PMAC64
	bool "Support for some Apple G5s"
	depends on CPU_FREQ && PMAC_SMU && PPC64
	select CPU_FREQ_TABLE
	help
	  This adds support for frequency switching on Apple iMac G5,
	  and some of the more recent desktop G5 machines as well.

config PPC601_SYNC_FIX
	bool "Workarounds for PPC601 bugs"
	depends on 6xx && (PPC_PREP || PPC_PMAC)
	help
	  Some versions of the PPC601 (the first PowerPC chip) have bugs which
	  mean that extra synchronization instructions are required near
	  certain instructions, typically those that make major changes to the
	  CPU state.  These extra instructions reduce performance slightly.
	  If you say N here, these extra instructions will not be included,
	  resulting in a kernel which will run faster but may not run at all
	  on some systems with the PPC601 chip.

	  If in doubt, say Y here.

config TAU
	bool "Thermal Management Support"
	depends on 6xx
	help
	  G3 and G4 processors have an on-chip temperature sensor called the
	  'Thermal Assist Unit (TAU)', which, in theory, can measure the on-die
	  temperature within 2-4 degrees Celsius. This option shows the current
	  on-die temperature in /proc/cpuinfo if the cpu supports it.

	  Unfortunately, on some chip revisions, this sensor is very inaccurate
	  and in some cases, does not work at all, so don't assume the cpu
	  temp is actually what /proc/cpuinfo says it is.

config TAU_INT
	bool "Interrupt driven TAU driver (DANGEROUS)"
	depends on TAU
	---help---
	  The TAU supports an interrupt driven mode which causes an interrupt
	  whenever the temperature goes out of range. This is the fastest way
	  to get notified the temp has exceeded a range. With this option off,
	  a timer is used to re-check the temperature periodically.

	  However, on some cpus it appears that the TAU interrupt hardware
	  is buggy and can cause a situation which would lead unexplained hard
	  lockups.

	  Unless you are extending the TAU driver, or enjoy kernel/hardware
	  debugging, leave this option off.

config TAU_AVERAGE
	bool "Average high and low temp"
	depends on TAU
	---help---
	  The TAU hardware can compare the temperature to an upper and lower
	  bound.  The default behavior is to show both the upper and lower
	  bound in /proc/cpuinfo. If the range is large, the temperature is
	  either changing a lot, or the TAU hardware is broken (likely on some
	  G4's). If the range is small (around 4 degrees), the temperature is
	  relatively stable.  If you say Y here, a single temperature value,
	  halfway between the upper and lower bounds, will be reported in
	  /proc/cpuinfo.

	  If in doubt, say N here.
endmenu

source arch/powerpc/platforms/embedded6xx/Kconfig
source arch/powerpc/platforms/4xx/Kconfig
source arch/powerpc/platforms/83xx/Kconfig
source arch/powerpc/platforms/85xx/Kconfig
source arch/powerpc/platforms/8xx/Kconfig
source arch/powerpc/platforms/cell/Kconfig

menu "Kernel options"

config HIGHMEM
	bool "High memory support"
	depends on PPC32

source kernel/Kconfig.hz
source kernel/Kconfig.preempt
source "fs/Kconfig.binfmt"

# We optimistically allocate largepages from the VM, so make the limit
# large enough (16MB). This badly named config option is actually
# max order + 1
config FORCE_MAX_ZONEORDER
	int
	depends on PPC64
	default "9" if PPC_64K_PAGES
	default "13"

config MATH_EMULATION
	bool "Math emulation"
	depends on 4xx || 8xx || E200 || E500
	---help---
	  Some PowerPC chips designed for embedded applications do not have
	  a floating-point unit and therefore do not implement the
	  floating-point instructions in the PowerPC instruction set.  If you
	  say Y here, the kernel will include code to emulate a floating-point
	  unit, which will allow programs that use floating-point
	  instructions to run.

config IOMMU_VMERGE
	bool "Enable IOMMU virtual merging (EXPERIMENTAL)"
	depends on EXPERIMENTAL && PPC64
	default n
	help
	  Cause IO segments sent to a device for DMA to be merged virtually
	  by the IOMMU when they happen to have been allocated contiguously.
	  This doesn't add pressure to the IOMMU allocator. However, some
	  drivers don't support getting large merged segments coming back
	  from *_map_sg(). Say Y if you know the drivers you are using are
	  properly handling this case.

config HOTPLUG_CPU
	bool "Support for enabling/disabling CPUs"
	depends on SMP && HOTPLUG && EXPERIMENTAL && (PPC_PSERIES || PPC_PMAC)
	---help---
	  Say Y here to be able to disable and re-enable individual
	  CPUs at runtime on SMP machines.

	  Say N if you are unsure.

config KEXEC
	bool "kexec system call (EXPERIMENTAL)"
	depends on PPC_MULTIPLATFORM && EXPERIMENTAL
	help
	  kexec is a system call that implements the ability to shutdown your
	  current kernel, and to start another kernel.  It is like a reboot
	  but it is indepedent of the system firmware.   And like a reboot
	  you can start any kernel with it, not just Linux.

	  The name comes from the similiarity to the exec system call.

	  It is an ongoing process to be certain the hardware in a machine
	  is properly shutdown, so do not be surprised if this code does not
	  initially work for you.  It may help to enable device hotplugging
	  support.  As of this writing the exact hardware interface is
	  strongly in flux, so no good recommendation can be made.

config EMBEDDEDBOOT
	bool
	depends on 8xx || 8260
	default y

config PC_KEYBOARD
	bool "PC PS/2 style Keyboard"
	depends on 4xx || CPM2

config PPCBUG_NVRAM
	bool "Enable reading PPCBUG NVRAM during boot" if PPLUS || LOPEC
	default y if PPC_PREP

config IRQ_ALL_CPUS
	bool "Distribute interrupts on all CPUs by default"
	depends on SMP && !MV64360
	help
	  This option gives the kernel permission to distribute IRQs across
	  multiple CPUs.  Saying N here will route all IRQs to the first
	  CPU.  Generally saying Y is safe, although some problems have been
	  reported with SMP Power Macintoshes with this option enabled.

source "arch/powerpc/platforms/pseries/Kconfig"

config NUMA
	bool "NUMA support"
	depends on PPC64
	default y if SMP && PPC_PSERIES

config ARCH_SELECT_MEMORY_MODEL
	def_bool y
	depends on PPC64

config ARCH_FLATMEM_ENABLE
	def_bool y
	depends on (PPC64 && !NUMA) || PPC32

config ARCH_SPARSEMEM_ENABLE
	def_bool y
	depends on PPC64

config ARCH_SPARSEMEM_DEFAULT
	def_bool y
	depends on SMP && PPC_PSERIES

source "mm/Kconfig"

config HAVE_ARCH_EARLY_PFN_TO_NID
	def_bool y
	depends on NEED_MULTIPLE_NODES

config ARCH_MEMORY_PROBE
	def_bool y
	depends on MEMORY_HOTPLUG

config PPC_64K_PAGES
	bool "64k page size"
	depends on PPC64
	help
	  This option changes the kernel logical page size to 64k. On machines
          without processor support for 64k pages, the kernel will simulate
          them by loading each individual 4k page on demand transparently,
          while on hardware with such support, it will be used to map
          normal application pages.

config SCHED_SMT
	bool "SMT (Hyperthreading) scheduler support"
	depends on PPC64 && SMP
	default off
	help
	  SMT scheduler support improves the CPU scheduler's decision making
	  when dealing with POWER5 cpus at a cost of slightly increased
	  overhead in some places. If unsure say N here.

config PROC_DEVICETREE
	bool "Support for device tree in /proc"
	depends on PROC_FS
	help
	  This option adds a device-tree directory under /proc which contains
	  an image of the device tree that the kernel copies from Open
	  Firmware or other boot firmware. If unsure, say Y here.

source "arch/powerpc/platforms/prep/Kconfig"

config CMDLINE_BOOL
	bool "Default bootloader kernel arguments"
	depends on !PPC_ISERIES

config CMDLINE
	string "Initial kernel command string"
	depends on CMDLINE_BOOL
	default "console=ttyS0,9600 console=tty0 root=/dev/sda2"
	help
	  On some platforms, there is currently no way for the boot loader to
	  pass arguments to the kernel. For these platforms, you can supply
	  some command-line options at build time by entering them here.  In
	  most cases you will need to specify the root device here.

if !44x || BROKEN
source kernel/power/Kconfig
endif

config SECCOMP
	bool "Enable seccomp to safely compute untrusted bytecode"
	depends on PROC_FS
	default y
	help
	  This kernel feature is useful for number crunching applications
	  that may need to compute untrusted bytecode during their
	  execution. By using pipes or other transports made available to
	  the process as file descriptors supporting the read/write
	  syscalls, it's possible to isolate those applications in
	  their own address space using seccomp. Once seccomp is
	  enabled via /proc/<pid>/seccomp, it cannot be disabled
	  and the task is only allowed to execute a few safe syscalls
	  defined by each seccomp mode.

	  If unsure, say Y. Only embedded should say N here.

endmenu

config ISA_DMA_API
	bool
	default y

menu "Bus options"

config ISA
	bool "Support for ISA-bus hardware"
	depends on PPC_PREP || PPC_CHRP
	select PPC_I8259
	help
	  Find out whether you have ISA slots on your motherboard.  ISA is the
	  name of a bus system, i.e. the way the CPU talks to the other stuff
	  inside your box.  If you have an Apple machine, say N here; if you
	  have an IBM RS/6000 or pSeries machine or a PReP machine, say Y.  If
	  you have an embedded board, consult your board documentation.

config GENERIC_ISA_DMA
	bool
	depends on PPC64 || POWER4 || 6xx && !CPM2
	default y

config PPC_I8259
	bool
	default y if 85xx
	default n

config PPC_INDIRECT_PCI
	bool
	depends on PCI
	default y if 40x || 44x || 85xx
	default n

config EISA
	bool

config SBUS
	bool

config FSL_SOC
	bool

# Yes MCA RS/6000s exist but Linux-PPC does not currently support any
config MCA
	bool

config PCI
	bool "PCI support" if 40x || CPM2 || PPC_83xx || 85xx || PPC_MPC52xx || (EMBEDDED && PPC_ISERIES)
	default y if !40x && !CPM2 && !8xx && !APUS && !PPC_83xx && !85xx
	default PCI_PERMEDIA if !4xx && !CPM2 && !8xx && APUS
	default PCI_QSPAN if !4xx && !CPM2 && 8xx
	help
	  Find out whether your system includes a PCI bus. PCI is the name of
	  a bus system, i.e. the way the CPU talks to the other stuff inside
	  your box.  If you say Y here, the kernel will include drivers and
	  infrastructure code to support PCI bus devices.

config PCI_DOMAINS
	bool
	default PCI

config PCI_QSPAN
	bool "QSpan PCI"
	depends on !4xx && !CPM2 && 8xx
	select PPC_I8259
	help
	  Say Y here if you have a system based on a Motorola 8xx-series
	  embedded processor with a QSPAN PCI interface, otherwise say N.

config PCI_8260
	bool
	depends on PCI && 8260
	select PPC_INDIRECT_PCI
	default y

config 8260_PCI9
	bool "  Enable workaround for MPC826x erratum PCI 9"
	depends on PCI_8260 && !ADS8272
	default y

choice
	prompt "  IDMA channel for PCI 9 workaround"
	depends on 8260_PCI9

config 8260_PCI9_IDMA1
	bool "IDMA1"

config 8260_PCI9_IDMA2
	bool "IDMA2"

config 8260_PCI9_IDMA3
	bool "IDMA3"

config 8260_PCI9_IDMA4
	bool "IDMA4"

endchoice

source "drivers/pci/Kconfig"

source "drivers/pcmcia/Kconfig"

source "drivers/pci/hotplug/Kconfig"

endmenu

menu "Advanced setup"
	depends on PPC32

config ADVANCED_OPTIONS
	bool "Prompt for advanced kernel configuration options"
	help
	  This option will enable prompting for a variety of advanced kernel
	  configuration options.  These options can cause the kernel to not
	  work if they are set incorrectly, but can be used to optimize certain
	  aspects of kernel memory management.

	  Unless you know what you are doing, say N here.

comment "Default settings for advanced configuration options are used"
	depends on !ADVANCED_OPTIONS

config HIGHMEM_START_BOOL
	bool "Set high memory pool address"
	depends on ADVANCED_OPTIONS && HIGHMEM
	help
	  This option allows you to set the base address of the kernel virtual
	  area used to map high memory pages.  This can be useful in
	  optimizing the layout of kernel virtual memory.

	  Say N here unless you know what you are doing.

config HIGHMEM_START
	hex "Virtual start address of high memory pool" if HIGHMEM_START_BOOL
	default "0xfe000000"

config LOWMEM_SIZE_BOOL
	bool "Set maximum low memory"
	depends on ADVANCED_OPTIONS
	help
	  This option allows you to set the maximum amount of memory which
	  will be used as "low memory", that is, memory which the kernel can
	  access directly, without having to set up a kernel virtual mapping.
	  This can be useful in optimizing the layout of kernel virtual
	  memory.

	  Say N here unless you know what you are doing.

config LOWMEM_SIZE
	hex "Maximum low memory size (in bytes)" if LOWMEM_SIZE_BOOL
	default "0x30000000"

config KERNEL_START_BOOL
	bool "Set custom kernel base address"
	depends on ADVANCED_OPTIONS
	help
	  This option allows you to set the kernel virtual address at which
	  the kernel will map low memory (the kernel image will be linked at
	  this address).  This can be useful in optimizing the virtual memory
	  layout of the system.

	  Say N here unless you know what you are doing.

config KERNEL_START
	hex "Virtual address of kernel base" if KERNEL_START_BOOL
	default "0xc0000000"

config TASK_SIZE_BOOL
	bool "Set custom user task size"
	depends on ADVANCED_OPTIONS
	help
	  This option allows you to set the amount of virtual address space
	  allocated to user tasks.  This can be useful in optimizing the
	  virtual memory layout of the system.

	  Say N here unless you know what you are doing.

config TASK_SIZE
	hex "Size of user task space" if TASK_SIZE_BOOL
	default "0x80000000"

config CONSISTENT_START_BOOL
	bool "Set custom consistent memory pool address"
	depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE
	help
	  This option allows you to set the base virtual address
	  of the the consistent memory pool.  This pool of virtual
	  memory is used to make consistent memory allocations.

config CONSISTENT_START
	hex "Base virtual address of consistent memory pool" if CONSISTENT_START_BOOL
	default "0xff100000" if NOT_COHERENT_CACHE

config CONSISTENT_SIZE_BOOL
	bool "Set custom consistent memory pool size"
	depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE
	help
	  This option allows you to set the size of the the
	  consistent memory pool.  This pool of virtual memory
	  is used to make consistent memory allocations.

config CONSISTENT_SIZE
	hex "Size of consistent memory pool" if CONSISTENT_SIZE_BOOL
	default "0x00200000" if NOT_COHERENT_CACHE

config BOOT_LOAD_BOOL
	bool "Set the boot link/load address"
	depends on ADVANCED_OPTIONS && !PPC_MULTIPLATFORM
	help
	  This option allows you to set the initial load address of the zImage
	  or zImage.initrd file.  This can be useful if you are on a board
	  which has a small amount of memory.

	  Say N here unless you know what you are doing.

config BOOT_LOAD
	hex "Link/load address for booting" if BOOT_LOAD_BOOL
	default "0x00400000" if 40x || 8xx || 8260
	default "0x01000000" if 44x
	default "0x00800000"

config PIN_TLB
	bool "Pinned Kernel TLBs (860 ONLY)"
	depends on ADVANCED_OPTIONS && 8xx
endmenu

if PPC64
config KERNEL_START
	hex
	default "0xc000000000000000"
endif

source "net/Kconfig"

source "drivers/Kconfig"

source "fs/Kconfig"

# XXX source "arch/ppc/8xx_io/Kconfig"

# XXX source "arch/ppc/8260_io/Kconfig"

source "arch/powerpc/platforms/iseries/Kconfig"

source "lib/Kconfig"

menu "Instrumentation Support"
        depends on EXPERIMENTAL

source "arch/powerpc/oprofile/Kconfig"

config KPROBES
	bool "Kprobes (EXPERIMENTAL)"
	depends on PPC64
	help
	  Kprobes allows you to trap at almost any kernel address and
	  execute a callback function.  register_kprobe() establishes
	  a probepoint and specifies the callback.  Kprobes is useful
	  for kernel debugging, non-intrusive instrumentation and testing.
	  If in doubt, say "N".
endmenu

source "arch/powerpc/Kconfig.debug"

source "security/Kconfig"

config KEYS_COMPAT
	bool
	depends on COMPAT && KEYS
	default y

source "crypto/Kconfig"
*/ if (!len) { restore_term(); return false; } /* Everything went OK! */ return true; } /* Handling console output is much simpler than input. */ static u32 handle_console_output(int fd, const struct iovec *iov, unsigned num, struct device*dev) { /* Whatever the Guest sends, write it to standard output. Return the * number of bytes written. */ return writev(STDOUT_FILENO, iov, num); } /* Guest->Host network output is also pretty easy. */ static u32 handle_tun_output(int fd, const struct iovec *iov, unsigned num, struct device *dev) { /* We put a flag in the "priv" pointer of the network device, and set * it as soon as we see output. We'll see why in handle_tun_input() */ *(bool *)dev->priv = true; /* Whatever packet the Guest sent us, write it out to the tun * device. */ return writev(dev->fd, iov, num); } /* This matches the peer_key() in lguest_net.c. The key for any given slot * is the address of the network device's page plus 4 * the slot number. */ static unsigned long peer_offset(unsigned int peernum) { return 4 * peernum; } /* This is where we handle a packet coming in from the tun device */ static bool handle_tun_input(int fd, struct device *dev) { u32 irq = 0, *lenp; int len; unsigned num; struct iovec iov[LGUEST_MAX_DMA_SECTIONS]; /* First we get a buffer the Guest has bound to its key. */ lenp = get_dma_buffer(fd, dev->mem+peer_offset(NET_PEERNUM), iov, &num, &irq); if (!lenp) { /* Now, it's expected that if we try to send a packet too * early, the Guest won't be ready yet. This is why we set a * flag when the Guest sends its first packet. If it's sent a * packet we assume it should be ready to receive them. * * Actually, this is what the status bits in the descriptor are * for: we should *use* them. FIXME! */ if (*(bool *)dev->priv) warn("network: no dma buffer!"); discard_iovec(iov, &num); } /* Read the packet from the device directly into the Guest's buffer. */ len = readv(dev->fd, iov, num); if (len <= 0) err(1, "reading network"); /* Write the used_len, and trigger the interrupt for the Guest */ if (lenp) { *lenp = len; trigger_irq(fd, irq); } verbose("tun input packet len %i [%02x %02x] (%s)\n", len, ((u8 *)iov[0].iov_base)[0], ((u8 *)iov[0].iov_base)[1], lenp ? "sent" : "discarded"); /* All good. */ return true; } /* The last device handling routine is block output: the Guest has sent a DMA * to the block device. It will have placed the command it wants in the * "struct lguest_block_page". */ static u32 handle_block_output(int fd, const struct iovec *iov, unsigned num, struct device *dev) { struct lguest_block_page *p = dev->mem; u32 irq, *lenp; unsigned int len, reply_num; struct iovec reply[LGUEST_MAX_DMA_SECTIONS]; off64_t device_len, off = (off64_t)p->sector * 512; /* First we extract the device length from the dev->priv pointer. */ device_len = *(off64_t *)dev->priv; /* We first check that the read or write is within the length of the * block file. */ if (off >= device_len) err(1, "Bad offset %llu vs %llu", off, device_len); /* Move to the right location in the block file. This shouldn't fail, * but best to check. */ if (lseek64(dev->fd, off, SEEK_SET) != off) err(1, "Bad seek to sector %i", p->sector); verbose("Block: %s at offset %llu\n", p->type ? "WRITE" : "READ", off); /* They were supposed to bind a reply buffer at key equal to the start * of the block device memory. We need this to tell them when the * request is finished. */ lenp = get_dma_buffer(fd, dev->mem, reply, &reply_num, &irq); if (!lenp) err(1, "Block request didn't give us a dma buffer"); if (p->type) { /* A write request. The DMA they sent contained the data, so * write it out. */ len = writev(dev->fd, iov, num); /* Grr... Now we know how long the "struct lguest_dma" they * sent was, we make sure they didn't try to write over the end * of the block file (possibly extending it). */ if (off + len > device_len) { /* Trim it back to the correct length */ ftruncate(dev->fd, device_len); /* Die, bad Guest, die. */ errx(1, "Write past end %llu+%u", off, len); } /* The reply length is 0: we just send back an empty DMA to * interrupt them and tell them the write is finished. */ *lenp = 0; } else { /* A read request. They sent an empty DMA to start the * request, and we put the read contents into the reply * buffer. */ len = readv(dev->fd, reply, reply_num); *lenp = len; } /* The result is 1 (done), 2 if there was an error (short read or * write). */ p->result = 1 + (p->bytes != len); /* Now tell them we've used their reply buffer. */ trigger_irq(fd, irq); /* We're supposed to return the number of bytes of the output buffer we * used. But the block device uses the "result" field instead, so we * don't bother. */ return 0; } /* This is the generic routine we call when the Guest sends some DMA out. */ static void handle_output(int fd, unsigned long dma, unsigned long key, struct device_list *devices) { struct device *i; u32 *lenp; struct iovec iov[LGUEST_MAX_DMA_SECTIONS]; unsigned num = 0; /* Convert the "struct lguest_dma" they're sending to a "struct * iovec". */ lenp = dma2iov(dma, iov, &num); /* Check each device: if they expect output to this key, tell them to * handle it. */ for (i = devices->dev; i; i = i->next) { if (i->handle_output && key == i->watch_key) { /* We write the result straight into the used_len field * for them. */ *lenp = i->handle_output(fd, iov, num, i); return; } } /* This can happen: the kernel sends any SEND_DMA which doesn't match * another Guest to us. It could be that another Guest just left a * network, for example. But it's unusual. */ warnx("Pending dma %p, key %p", (void *)dma, (void *)key); } /* This is called when the waker wakes us up: check for incoming file * descriptors. */ static void handle_input(int fd, struct device_list *devices) { /* select() wants a zeroed timeval to mean "don't wait". */ struct timeval poll = { .tv_sec = 0, .tv_usec = 0 }; for (;;) { struct device *i; fd_set fds = devices->infds; /* If nothing is ready, we're done. */ if (select(devices->max_infd+1, &fds, NULL, NULL, &poll) == 0) break; /* Otherwise, call the device(s) which have readable * file descriptors and a method of handling them. */ for (i = devices->dev; i; i = i->next) { if (i->handle_input && FD_ISSET(i->fd, &fds)) { /* If handle_input() returns false, it means we * should no longer service it. * handle_console_input() does this. */ if (!i->handle_input(fd, i)) { /* Clear it from the set of input file * descriptors kept at the head of the * device list. */ FD_CLR(i->fd, &devices->infds); /* Tell waker to ignore it too... */ write(waker_fd, &i->fd, sizeof(i->fd)); } } } } } /*L:190 * Device Setup * * All devices need a descriptor so the Guest knows it exists, and a "struct * device" so the Launcher can keep track of it. We have common helper * routines to allocate them. * * This routine allocates a new "struct lguest_device_desc" from descriptor * table in the devices array just above the Guest's normal memory. */ static struct lguest_device_desc * new_dev_desc(struct lguest_device_desc *descs, u16 type, u16 features, u16 num_pages) { unsigned int i; for (i = 0; i < LGUEST_MAX_DEVICES; i++) { if (!descs[i].type) { descs[i].type = type; descs[i].features = features; descs[i].num_pages = num_pages; /* If they said the device needs memory, we allocate * that now, bumping up the top of Guest memory. */ if (num_pages) { map_zeroed_pages(top, num_pages); descs[i].pfn = top/getpagesize(); top += num_pages*getpagesize(); } return &descs[i]; } } errx(1, "too many devices"); } /* This monster routine does all the creation and setup of a new device, * including caling new_dev_desc() to allocate the descriptor and device * memory. */ static struct device *new_device(struct device_list *devices, u16 type, u16 num_pages, u16 features, int fd, bool (*handle_input)(int, struct device *), unsigned long watch_off, u32 (*handle_output)(int, const struct iovec *, unsigned, struct device *)) { struct device *dev = malloc(sizeof(*dev)); /* Append to device list. Prepending to a single-linked list is * easier, but the user expects the devices to be arranged on the bus * in command-line order. The first network device on the command line * is eth0, the first block device /dev/lgba, etc. */ *devices->lastdev = dev; dev->next = NULL; devices->lastdev = &dev->next; /* Now we populate the fields one at a time. */ dev->fd = fd; /* If we have an input handler for this file descriptor, then we add it * to the device_list's fdset and maxfd. */ if (handle_input) set_fd(dev->fd, devices); dev->desc = new_dev_desc(devices->descs, type, features, num_pages); dev->mem = (void *)(dev->desc->pfn * getpagesize()); dev->handle_input = handle_input; dev->watch_key = (unsigned long)dev->mem + watch_off; dev->handle_output = handle_output; return dev; } /* Our first setup routine is the console. It's a fairly simple device, but * UNIX tty handling makes it uglier than it could be. */ static void setup_console(struct device_list *devices) { struct device *dev; /* If we can save the initial standard input settings... */ if (tcgetattr(STDIN_FILENO, &orig_term) == 0) { struct termios term = orig_term; /* Then we turn off echo, line buffering and ^C etc. We want a * raw input stream to the Guest. */ term.c_lflag &= ~(ISIG|ICANON|ECHO); tcsetattr(STDIN_FILENO, TCSANOW, &term); /* If we exit gracefully, the original settings will be * restored so the user can see what they're typing. */ atexit(restore_term); } /* We don't currently require any memory for the console, so we ask for * 0 pages. */ dev = new_device(devices, LGUEST_DEVICE_T_CONSOLE, 0, 0, STDIN_FILENO, handle_console_input, LGUEST_CONSOLE_DMA_KEY, handle_console_output); /* We store the console state in dev->priv, and initialize it. */ dev->priv = malloc(sizeof(struct console_abort)); ((struct console_abort *)dev->priv)->count = 0; verbose("device %p: console\n", (void *)(dev->desc->pfn * getpagesize())); } /* Setting up a block file is also fairly straightforward. */ static void setup_block_file(const char *filename, struct device_list *devices) { int fd; struct device *dev; off64_t *device_len; struct lguest_block_page *p; /* We open with O_LARGEFILE because otherwise we get stuck at 2G. We * open with O_DIRECT because otherwise our benchmarks go much too * fast. */ fd = open_or_die(filename, O_RDWR|O_LARGEFILE|O_DIRECT); /* We want one page, and have no input handler (the block file never * has anything interesting to say to us). Our timing will be quite * random, so it should be a reasonable randomness source. */ dev = new_device(devices, LGUEST_DEVICE_T_BLOCK, 1, LGUEST_DEVICE_F_RANDOMNESS, fd, NULL, 0, handle_block_output); /* We store the device size in the private area */ device_len = dev->priv = malloc(sizeof(*device_len)); /* This is the safe way of establishing the size of our device: it * might be a normal file or an actual block device like /dev/hdb. */ *device_len = lseek64(fd, 0, SEEK_END); /* The device memory is a "struct lguest_block_page". It's zeroed * already, we just need to put in the device size. Block devices * think in sectors (ie. 512 byte chunks), so we translate here. */ p = dev->mem; p->num_sectors = *device_len/512; verbose("device %p: block %i sectors\n", (void *)(dev->desc->pfn * getpagesize()), p->num_sectors); } /* * Network Devices. * * Setting up network devices is quite a pain, because we have three types. * First, we have the inter-Guest network. This is a file which is mapped into * the address space of the Guests who are on the network. Because it is a * shared mapping, the same page underlies all the devices, and they can send * DMA to each other. * * Remember from our network driver, the Guest is told what slot in the page it * is to use. We use exclusive fnctl locks to reserve a slot. If another * Guest is using a slot, the lock will fail and we try another. Because fnctl * locks are cleaned up automatically when we die, this cleverly means that our * reservation on the slot will vanish if we crash. */ static unsigned int find_slot(int netfd, const char *filename) { struct flock fl; fl.l_type = F_WRLCK; fl.l_whence = SEEK_SET; fl.l_len = 1; /* Try a 1 byte lock in each possible position number */ for (fl.l_start = 0; fl.l_start < getpagesize()/sizeof(struct lguest_net); fl.l_start++) { /* If we succeed, return the slot number. */ if (fcntl(netfd, F_SETLK, &fl) == 0) return fl.l_start; } errx(1, "No free slots in network file %s", filename); } /* This function sets up the network file */ static void setup_net_file(const char *filename, struct device_list *devices) { int netfd; struct device *dev; /* We don't use open_or_die() here: for friendliness we create the file * if it doesn't already exist. */ netfd = open(filename, O_RDWR, 0); if (netfd < 0) { if (errno == ENOENT) { netfd = open(filename, O_RDWR|O_CREAT, 0600); if (netfd >= 0) { /* If we succeeded, initialize the file with a * blank page. */ char page[getpagesize()]; memset(page, 0, sizeof(page)); write(netfd, page, sizeof(page)); } } if (netfd < 0) err(1, "cannot open net file '%s'", filename); } /* We need 1 page, and the features indicate the slot to use and that * no checksum is needed. We never touch this device again; it's * between the Guests on the network, so we don't register input or * output handlers. */ dev = new_device(devices, LGUEST_DEVICE_T_NET, 1, find_slot(netfd, filename)|LGUEST_NET_F_NOCSUM, -1, NULL, 0, NULL); /* Map the shared file. */ if (mmap(dev->mem, getpagesize(), PROT_READ|PROT_WRITE, MAP_FIXED|MAP_SHARED, netfd, 0) != dev->mem) err(1, "could not mmap '%s'", filename); verbose("device %p: shared net %s, peer %i\n", (void *)(dev->desc->pfn * getpagesize()), filename, dev->desc->features & ~LGUEST_NET_F_NOCSUM); } /*:*/ static u32 str2ip(const char *ipaddr) { unsigned int byte[4]; sscanf(ipaddr, "%u.%u.%u.%u", &byte[0], &byte[1], &byte[2], &byte[3]); return (byte[0] << 24) | (byte[1] << 16) | (byte[2] << 8) | byte[3]; } /* This code is "adapted" from libbridge: it attaches the Host end of the * network device to the bridge device specified by the command line. * * This is yet another James Morris contribution (I'm an IP-level guy, so I * dislike bridging), and I just try not to break it. */ static void add_to_bridge(int fd, const char *if_name, const char *br_name) { int ifidx; struct ifreq ifr; if (!*br_name) errx(1, "must specify bridge name"); ifidx = if_nametoindex(if_name); if (!ifidx) errx(1, "interface %s does not exist!", if_name); strncpy(ifr.ifr_name, br_name, IFNAMSIZ); ifr.ifr_ifindex = ifidx; if (ioctl(fd, SIOCBRADDIF, &ifr) < 0) err(1, "can't add %s to bridge %s", if_name, br_name); } /* This sets up the Host end of the network device with an IP address, brings * it up so packets will flow, the copies the MAC address into the hwaddr * pointer (in practice, the Host's slot in the network device's memory). */ static void configure_device(int fd, const char *devname, u32 ipaddr, unsigned char hwaddr[6]) { struct ifreq ifr; struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr; /* Don't read these incantations. Just cut & paste them like I did! */ memset(&ifr, 0, sizeof(ifr)); strcpy(ifr.ifr_name, devname); sin->sin_family = AF_INET; sin->sin_addr.s_addr = htonl(ipaddr); if (ioctl(fd, SIOCSIFADDR, &ifr) != 0) err(1, "Setting %s interface address", devname); ifr.ifr_flags = IFF_UP; if (ioctl(fd, SIOCSIFFLAGS, &ifr) != 0) err(1, "Bringing interface %s up", devname); /* SIOC stands for Socket I/O Control. G means Get (vs S for Set * above). IF means Interface, and HWADDR is hardware address. * Simple! */ if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0) err(1, "getting hw address for %s", devname); memcpy(hwaddr, ifr.ifr_hwaddr.sa_data, 6); } /*L:195 The other kind of network is a Host<->Guest network. This can either * use briding or routing, but the principle is the same: it uses the "tun" * device to inject packets into the Host as if they came in from a normal * network card. We just shunt packets between the Guest and the tun * device. */ static void setup_tun_net(const char *arg, struct device_list *devices) { struct device *dev; struct ifreq ifr; int netfd, ipfd; u32 ip; const char *br_name = NULL; /* We open the /dev/net/tun device and tell it we want a tap device. A * tap device is like a tun device, only somehow different. To tell * the truth, I completely blundered my way through this code, but it * works now! */ netfd = open_or_die("/dev/net/tun", O_RDWR); memset(&ifr, 0, sizeof(ifr)); ifr.ifr_flags = IFF_TAP | IFF_NO_PI; strcpy(ifr.ifr_name, "tap%d"); if (ioctl(netfd, TUNSETIFF, &ifr) != 0) err(1, "configuring /dev/net/tun"); /* We don't need checksums calculated for packets coming in this * device: trust us! */ ioctl(netfd, TUNSETNOCSUM, 1); /* We create the net device with 1 page, using the features field of * the descriptor to tell the Guest it is in slot 1 (NET_PEERNUM), and * that the device has fairly random timing. We do *not* specify * LGUEST_NET_F_NOCSUM: these packets can reach the real world. * * We will put our MAC address is slot 0 for the Guest to see, so * it will send packets to us using the key "peer_offset(0)": */ dev = new_device(devices, LGUEST_DEVICE_T_NET, 1, NET_PEERNUM|LGUEST_DEVICE_F_RANDOMNESS, netfd, handle_tun_input, peer_offset(0), handle_tun_output); /* We keep a flag which says whether we've seen packets come out from * this network device. */ dev->priv = malloc(sizeof(bool)); *(bool *)dev->priv = false; /* We need a socket to perform the magic network ioctls to bring up the * tap interface, connect to the bridge etc. Any socket will do! */ ipfd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); if (ipfd < 0) err(1, "opening IP socket"); /* If the command line was --tunnet=bridge:<name> do bridging. */ if (!strncmp(BRIDGE_PFX, arg, strlen(BRIDGE_PFX))) { ip = INADDR_ANY; br_name = arg + strlen(BRIDGE_PFX); add_to_bridge(ipfd, ifr.ifr_name, br_name); } else /* It is an IP address to set up the device with */ ip = str2ip(arg); /* We are peer 0, ie. first slot, so we hand dev->mem to this routine * to write the MAC address at the start of the device memory. */ configure_device(ipfd, ifr.ifr_name, ip, dev->mem); /* Set "promisc" bit: we want every single packet if we're going to * bridge to other machines (and otherwise it doesn't matter). */ *((u8 *)dev->mem) |= 0x1; close(ipfd); verbose("device %p: tun net %u.%u.%u.%u\n", (void *)(dev->desc->pfn * getpagesize()), (u8)(ip>>24), (u8)(ip>>16), (u8)(ip>>8), (u8)ip); if (br_name) verbose("attached to bridge: %s\n", br_name); } /* That's the end of device setup. */ /*L:220 Finally we reach the core of the Launcher, which runs the Guest, serves * its input and output, and finally, lays it to rest. */ static void __attribute__((noreturn)) run_guest(int lguest_fd, struct device_list *device_list) { for (;;) { u32 args[] = { LHREQ_BREAK, 0 }; unsigned long arr[2]; int readval; /* We read from the /dev/lguest device to run the Guest. */ readval = read(lguest_fd, arr, sizeof(arr)); /* The read can only really return sizeof(arr) (the Guest did a * SEND_DMA to us), or an error. */ /* For a successful read, arr[0] is the address of the "struct * lguest_dma", and arr[1] is the key the Guest sent to. */ if (readval == sizeof(arr)) { handle_output(lguest_fd, arr[0], arr[1], device_list); continue; /* ENOENT means the Guest died. Reading tells us why. */ } else if (errno == ENOENT) { char reason[1024] = { 0 }; read(lguest_fd, reason, sizeof(reason)-1); errx(1, "%s", reason); /* EAGAIN means the waker wanted us to look at some input. * Anything else means a bug or incompatible change. */ } else if (errno != EAGAIN) err(1, "Running guest failed"); /* Service input, then unset the BREAK which releases * the Waker. */ handle_input(lguest_fd, device_list); if (write(lguest_fd, args, sizeof(args)) < 0) err(1, "Resetting break"); } } /* * This is the end of the Launcher. * * But wait! We've seen I/O from the Launcher, and we've seen I/O from the * Drivers. If we were to see the Host kernel I/O code, our understanding * would be complete... :*/ static struct option opts[] = { { "verbose", 0, NULL, 'v' }, { "sharenet", 1, NULL, 's' }, { "tunnet", 1, NULL, 't' }, { "block", 1, NULL, 'b' }, { "initrd", 1, NULL, 'i' }, { NULL }, }; static void usage(void) { errx(1, "Usage: lguest [--verbose] " "[--sharenet=<filename>|--tunnet=(<ipaddr>|bridge:<bridgename>)\n" "|--block=<filename>|--initrd=<filename>]...\n" "<mem-in-mb> vmlinux [args...]"); } /*L:100 The Launcher code itself takes us out into userspace, that scary place * where pointers run wild and free! Unfortunately, like most userspace * programs, it's quite boring (which is why everyone like to hack on the * kernel!). Perhaps if you make up an Lguest Drinking Game at this point, it * will get you through this section. Or, maybe not. * * The Launcher binary sits up high, usually starting at address 0xB8000000. * Everything below this is the "physical" memory for the Guest. For example, * if the Guest were to write a "1" at physical address 0, we would see a "1" * in the Launcher at "(int *)0". Guest physical == Launcher virtual. * * This can be tough to get your head around, but usually it just means that we * don't need to do any conversion when the Guest gives us it's "physical" * addresses. */ int main(int argc, char *argv[]) { /* Memory, top-level pagetable, code startpoint, PAGE_OFFSET and size * of the (optional) initrd. */ unsigned long mem = 0, pgdir, start, page_offset, initrd_size = 0; /* A temporary and the /dev/lguest file descriptor. */ int i, c, lguest_fd; /* The list of Guest devices, based on command line arguments. */ struct device_list device_list; /* The boot information for the Guest: at guest-physical address 0. */ void *boot = (void *)0; /* If they specify an initrd file to load. */ const char *initrd_name = NULL; /* First we initialize the device list. Since console and network * device receive input from a file descriptor, we keep an fdset * (infds) and the maximum fd number (max_infd) with the head of the * list. We also keep a pointer to the last device, for easy appending * to the list. */ device_list.max_infd = -1; device_list.dev = NULL; device_list.lastdev = &device_list.dev; FD_ZERO(&device_list.infds); /* We need to know how much memory so we can set up the device * descriptor and memory pages for the devices as we parse the command * line. So we quickly look through the arguments to find the amount * of memory now. */ for (i = 1; i < argc; i++) { if (argv[i][0] != '-') { mem = top = atoi(argv[i]) * 1024 * 1024; device_list.descs = map_zeroed_pages(top, 1); top += getpagesize(); break; } } /* The options are fairly straight-forward */ while ((c = getopt_long(argc, argv, "v", opts, NULL)) != EOF) { switch (c) { case 'v': verbose = true; break; case 's': setup_net_file(optarg, &device_list); break; case 't': setup_tun_net(optarg, &device_list); break; case 'b': setup_block_file(optarg, &device_list); break; case 'i': initrd_name = optarg; break; default: warnx("Unknown argument %s", argv[optind]); usage(); } } /* After the other arguments we expect memory and kernel image name, * followed by command line arguments for the kernel. */ if (optind + 2 > argc) usage(); /* We always have a console device */ setup_console(&device_list); /* We start by mapping anonymous pages over all of guest-physical * memory range. This fills it with 0, and ensures that the Guest * won't be killed when it tries to access it. */ map_zeroed_pages(0, mem / getpagesize()); /* Now we load the kernel */ start = load_kernel(open_or_die(argv[optind+1], O_RDONLY), &page_offset); /* Map the initrd image if requested (at top of physical memory) */ if (initrd_name) { initrd_size = load_initrd(initrd_name, mem); /* These are the location in the Linux boot header where the * start and size of the initrd are expected to be found. */ *(unsigned long *)(boot+0x218) = mem - initrd_size; *(unsigned long *)(boot+0x21c) = initrd_size; /* The bootloader type 0xFF means "unknown"; that's OK. */ *(unsigned char *)(boot+0x210) = 0xFF; } /* Set up the initial linear pagetables, starting below the initrd. */ pgdir = setup_pagetables(mem, initrd_size, page_offset); /* The Linux boot header contains an "E820" memory map: ours is a * simple, single region. */ *(char*)(boot+E820NR) = 1; *((struct e820entry *)(boot+E820MAP)) = ((struct e820entry) { 0, mem, E820_RAM }); /* The boot header contains a command line pointer: we put the command * line after the boot header (at address 4096) */ *(void **)(boot + 0x228) = boot + 4096; concat(boot + 4096, argv+optind+2); /* The guest type value of "1" tells the Guest it's under lguest. */ *(int *)(boot + 0x23c) = 1; /* We tell the kernel to initialize the Guest: this returns the open * /dev/lguest file descriptor. */ lguest_fd = tell_kernel(pgdir, start, page_offset); /* We fork off a child process, which wakes the Launcher whenever one * of the input file descriptors needs attention. Otherwise we would * run the Guest until it tries to output something. */ waker_fd = setup_waker(lguest_fd, &device_list); /* Finally, run the Guest. This doesn't return. */ run_guest(lguest_fd, &device_list); }