aboutsummaryrefslogblamecommitdiffstats
path: root/drivers/scsi/fdomain.c
blob: 56f4e6bffc213529ce0d9866223a7d958ab6e92b (plain) (tree)











































































































































































































































































                                                                               











                            
                     

                         








                             
              


                                                
      





























































































                                                                              
                 
                                 
      















                                                                     
                                                                       




                                                                    

              








                                                            

                                           
                                                               
                                    


                                                               

                    































                                                                           

              










































                                                                               
                                              
 

                    














                                                                          
 

















































                                                                                                                                                              
                                                






























































                                                                                 

              

































                                                                                      





















































































































                                                                           

 













                                                      





















                                                                                      
                                                                                       
                         

                               















                                                                        
                  















                                                                                                    
                




                                                                           
                     



                  


                     
 
 

















































                                                                                                                 
                
























                                                                              





                                                                                          
                



                                             
                                                                                         














                                                                                             
                   


                



                                   



























































































































                                                                              
                                                              
























































                                                                               
                                                                                   










                                                     
                                                     






























































































































                                                                                 
                                                                     































                                                                              
                                                                  











                                                                            

                                                  



                                    


                                                     

















































                                                                               

                                








                                                                       

                                                            
                                                                 

                                                                             
           
                                                   


                                                   
    
 









































                                                                                   

                                








































































                                                                              



                         





                                           

                            


































































































































































                                                                             

                                     





















                                                         
                 






                                                               
      

                                               
 
      
/* fdomain.c -- Future Domain TMC-16x0 SCSI driver
 * Created: Sun May  3 18:53:19 1992 by faith@cs.unc.edu
 * Revised: Mon Dec 28 21:59:02 1998 by faith@acm.org
 * Author: Rickard E. Faith, faith@cs.unc.edu
 * Copyright 1992-1996, 1998 Rickard E. Faith (faith@acm.org)
 * Shared IRQ supported added 7/7/2001  Alan Cox <alan@redhat.com>

 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the
 * Free Software Foundation; either version 2, or (at your option) any
 * later version.

 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.

 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 675 Mass Ave, Cambridge, MA 02139, USA.

 **************************************************************************

 SUMMARY:

 Future Domain BIOS versions supported for autodetect:
    2.0, 3.0, 3.2, 3.4 (1.0), 3.5 (2.0), 3.6, 3.61
 Chips are supported:
    TMC-1800, TMC-18C50, TMC-18C30, TMC-36C70
 Boards supported:
    Future Domain TMC-1650, TMC-1660, TMC-1670, TMC-1680, TMC-1610M/MER/MEX
    Future Domain TMC-3260 (PCI)
    Quantum ISA-200S, ISA-250MG
    Adaptec AHA-2920A (PCI) [BUT *NOT* AHA-2920C -- use aic7xxx instead]
    IBM ?
 LILO/INSMOD command-line options:
    fdomain=<PORT_BASE>,<IRQ>[,<ADAPTER_ID>]


    
 NOTE:

 The Adaptec AHA-2920C has an Adaptec AIC-7850 chip on it.
 Use the aic7xxx driver for this board.
       
 The Adaptec AHA-2920A has a Future Domain chip on it, so this is the right
 driver for that card.  Unfortunately, the boxes will probably just say
 "2920", so you'll have to look on the card for a Future Domain logo, or a
 letter after the 2920.

 
 
 THANKS:

 Thanks to Adaptec for providing PCI boards for testing.  This finally
 enabled me to test the PCI detection and correct it for PCI boards that do
 not have a BIOS at a standard ISA location.  For PCI boards, LILO/INSMOD
 command-line options should no longer be needed.  --RF 18Nov98


 
 DESCRIPTION:
 
 This is the Linux low-level SCSI driver for Future Domain TMC-1660/1680
 TMC-1650/1670, and TMC-3260 SCSI host adapters.  The 1650 and 1670 have a
 25-pin external connector, whereas the 1660 and 1680 have a SCSI-2 50-pin
 high-density external connector.  The 1670 and 1680 have floppy disk
 controllers built in.  The TMC-3260 is a PCI bus card.

 Future Domain's older boards are based on the TMC-1800 chip, and this
 driver was originally written for a TMC-1680 board with the TMC-1800 chip.
 More recently, boards are being produced with the TMC-18C50 and TMC-18C30
 chips.  The latest and greatest board may not work with this driver.  If
 you have to patch this driver so that it will recognize your board's BIOS
 signature, then the driver may fail to function after the board is
 detected.

 Please note that the drive ordering that Future Domain implemented in BIOS
 versions 3.4 and 3.5 is the opposite of the order (currently) used by the
 rest of the SCSI industry.  If you have BIOS version 3.4 or 3.5, and have
 more than one drive, then the drive ordering will be the reverse of that
 which you see under DOS.  For example, under DOS SCSI ID 0 will be D: and
 SCSI ID 1 will be C: (the boot device).  Under Linux, SCSI ID 0 will be
 /dev/sda and SCSI ID 1 will be /dev/sdb.  The Linux ordering is consistent
 with that provided by all the other SCSI drivers for Linux.  If you want
 this changed, you will probably have to patch the higher level SCSI code.
 If you do so, please send me patches that are protected by #ifdefs.

 If you have a TMC-8xx or TMC-9xx board, then this is not the driver for
 your board.  Please refer to the Seagate driver for more information and
 possible support.

 
 
 HISTORY:

 Linux       Driver      Driver
 Version     Version     Date         Support/Notes

             0.0          3 May 1992  V2.0 BIOS; 1800 chip
 0.97        1.9         28 Jul 1992
 0.98.6      3.1         27 Nov 1992
 0.99        3.2          9 Dec 1992

 0.99.3      3.3         10 Jan 1993  V3.0 BIOS
 0.99.5      3.5         18 Feb 1993
 0.99.10     3.6         15 May 1993  V3.2 BIOS; 18C50 chip
 0.99.11     3.17         3 Jul 1993  (now under RCS)
 0.99.12     3.18        13 Aug 1993
 0.99.14     5.6         31 Oct 1993  (reselection code removed)

 0.99.15     5.9         23 Jan 1994  V3.4 BIOS (preliminary)
 1.0.8/1.1.1 5.15         1 Apr 1994  V3.4 BIOS; 18C30 chip (preliminary)
 1.0.9/1.1.3 5.16         7 Apr 1994  V3.4 BIOS; 18C30 chip
 1.1.38      5.18        30 Jul 1994  36C70 chip (PCI version of 18C30)
 1.1.62      5.20         2 Nov 1994  V3.5 BIOS
 1.1.73      5.22         7 Dec 1994  Quantum ISA-200S board; V2.0 BIOS

 1.1.82      5.26        14 Jan 1995  V3.5 BIOS; TMC-1610M/MER/MEX board
 1.2.10      5.28         5 Jun 1995  Quantum ISA-250MG board; V2.0, V2.01 BIOS
 1.3.4       5.31        23 Jun 1995  PCI BIOS-32 detection (preliminary)
 1.3.7       5.33         4 Jul 1995  PCI BIOS-32 detection
 1.3.28      5.36        17 Sep 1995  V3.61 BIOS; LILO command-line support
 1.3.34      5.39        12 Oct 1995  V3.60 BIOS; /proc
 1.3.72      5.39         8 Feb 1996  Adaptec AHA-2920 board
 1.3.85      5.41         4 Apr 1996
 2.0.12      5.44         8 Aug 1996  Use ID 7 for all PCI cards
 2.1.1       5.45         2 Oct 1996  Update ROM accesses for 2.1.x
 2.1.97      5.46	 23 Apr 1998  Rewritten PCI detection routines [mj]
 2.1.11x     5.47	  9 Aug 1998  Touched for 8 SCSI disk majors support
             5.48        18 Nov 1998  BIOS no longer needed for PCI detection
 2.2.0       5.50        28 Dec 1998  Support insmod parameters
 

 REFERENCES USED:

 "TMC-1800 SCSI Chip Specification (FDC-1800T)", Future Domain Corporation,
 1990.

 "Technical Reference Manual: 18C50 SCSI Host Adapter Chip", Future Domain
 Corporation, January 1992.

 "LXT SCSI Products: Specifications and OEM Technical Manual (Revision
 B/September 1991)", Maxtor Corporation, 1991.

 "7213S product Manual (Revision P3)", Maxtor Corporation, 1992.

 "Draft Proposed American National Standard: Small Computer System
 Interface - 2 (SCSI-2)", Global Engineering Documents. (X3T9.2/86-109,
 revision 10h, October 17, 1991)

 Private communications, Drew Eckhardt (drew@cs.colorado.edu) and Eric
 Youngdale (ericy@cais.com), 1992.

 Private communication, Tuong Le (Future Domain Engineering department),
 1994. (Disk geometry computations for Future Domain BIOS version 3.4, and
 TMC-18C30 detection.)

 Hogan, Thom. The Programmer's PC Sourcebook. Microsoft Press, 1988. Page
 60 (2.39: Disk Partition Table Layout).

 "18C30 Technical Reference Manual", Future Domain Corporation, 1993, page
 6-1.


 
 NOTES ON REFERENCES:

 The Maxtor manuals were free.  Maxtor telephone technical support is
 great!

 The Future Domain manuals were $25 and $35.  They document the chip, not
 the TMC-16x0 boards, so some information I had to guess at.  In 1992,
 Future Domain sold DOS BIOS source for $250 and the UN*X driver source was
 $750, but these required a non-disclosure agreement, so even if I could
 have afforded them, they would *not* have been useful for writing this
 publically distributable driver.  Future Domain technical support has
 provided some information on the phone and have sent a few useful FAXs.
 They have been much more helpful since they started to recognize that the
 word "Linux" refers to an operating system :-).

 

 ALPHA TESTERS:

 There are many other alpha testers that come and go as the driver
 develops.  The people listed here were most helpful in times of greatest
 need (mostly early on -- I've probably left out a few worthy people in
 more recent times):

 Todd Carrico (todd@wutc.wustl.edu), Dan Poirier (poirier@cs.unc.edu ), Ken
 Corey (kenc@sol.acs.unt.edu), C. de Bruin (bruin@bruin@sterbbs.nl), Sakari
 Aaltonen (sakaria@vipunen.hit.fi), John Rice (rice@xanth.cs.odu.edu), Brad
 Yearwood (brad@optilink.com), and Ray Toy (toy@soho.crd.ge.com).

 Special thanks to Tien-Wan Yang (twyang@cs.uh.edu), who graciously lent me
 his 18C50-based card for debugging.  He is the sole reason that this
 driver works with the 18C50 chip.

 Thanks to Dave Newman (dnewman@crl.com) for providing initial patches for
 the version 3.4 BIOS.

 Thanks to James T. McKinley (mckinley@msupa.pa.msu.edu) for providing
 patches that support the TMC-3260, a PCI bus card with the 36C70 chip.
 The 36C70 chip appears to be "completely compatible" with the 18C30 chip.

 Thanks to Eric Kasten (tigger@petroglyph.cl.msu.edu) for providing the
 patch for the version 3.5 BIOS.

 Thanks for Stephen Henson (shenson@nyx10.cs.du.edu) for providing the
 patch for the Quantum ISA-200S SCSI adapter.
 
 Thanks to Adam Bowen for the signature to the 1610M/MER/MEX scsi cards, to
 Martin Andrews (andrewm@ccfadm.eeg.ccf.org) for the signature to some
 random TMC-1680 repackaged by IBM; and to Mintak Ng (mintak@panix.com) for
 the version 3.61 BIOS signature.

 Thanks for Mark Singer (elf@netcom.com) and Richard Simpson
 (rsimpson@ewrcsdra.demon.co.uk) for more Quantum signatures and detective
 work on the Quantum RAM layout.

 Special thanks to James T. McKinley (mckinley@msupa.pa.msu.edu) for
 providing patches for proper PCI BIOS32-mediated detection of the TMC-3260
 card (a PCI bus card with the 36C70 chip).  Please send James PCI-related
 bug reports.

 Thanks to Tom Cavin (tec@usa1.com) for preliminary command-line option
 patches.

 New PCI detection code written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>

 Insmod parameter code based on patches from Daniel Graham
 <graham@balance.uoregon.edu>. 
 
 All of the alpha testers deserve much thanks.



 NOTES ON USER DEFINABLE OPTIONS:

 DEBUG: This turns on the printing of various debug information.

 ENABLE_PARITY: This turns on SCSI parity checking.  With the current
 driver, all attached devices must support SCSI parity.  If none of your
 devices support parity, then you can probably get the driver to work by
 turning this option off.  I have no way of testing this, however, and it
 would appear that no one ever uses this option.

 FIFO_COUNT: The host adapter has an 8K cache (host adapters based on the
 18C30 chip have a 2k cache).  When this many 512 byte blocks are filled by
 the SCSI device, an interrupt will be raised.  Therefore, this could be as
 low as 0, or as high as 16.  Note, however, that values which are too high
 or too low seem to prevent any interrupts from occurring, and thereby lock
 up the machine.  I have found that 2 is a good number, but throughput may
 be increased by changing this value to values which are close to 2.
 Please let me know if you try any different values.

 RESELECTION: This is no longer an option, since I gave up trying to
 implement it in version 4.x of this driver.  It did not improve
 performance at all and made the driver unstable (because I never found one
 of the two race conditions which were introduced by the multiple
 outstanding command code).  The instability seems a very high price to pay
 just so that you don't have to wait for the tape to rewind.  If you want
 this feature implemented, send me patches.  I'll be happy to send a copy
 of my (broken) driver to anyone who would like to see a copy.

 **************************************************************************/

#include <linux/module.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/blkdev.h>
#include <linux/spinlock.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/ioport.h>
#include <linux/proc_fs.h>
#include <linux/pci.h>
#include <linux/stat.h>
#include <linux/delay.h>
#include <linux/io.h>
#include <scsi/scsicam.h>

#include <asm/system.h>

#include <scsi/scsi.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_device.h>
#include <scsi/scsi_host.h>
#include <scsi/scsi_ioctl.h>
#include "fdomain.h"

#ifndef PCMCIA
MODULE_AUTHOR("Rickard E. Faith");
MODULE_DESCRIPTION("Future domain SCSI driver");
MODULE_LICENSE("GPL");
#endif

  
#define VERSION          "$Revision: 5.51 $"

/* START OF USER DEFINABLE OPTIONS */

#define DEBUG            0	/* Enable debugging output */
#define ENABLE_PARITY    1	/* Enable SCSI Parity */
#define FIFO_COUNT       2	/* Number of 512 byte blocks before INTR */

/* END OF USER DEFINABLE OPTIONS */

#if DEBUG
#define EVERY_ACCESS     0	/* Write a line on every scsi access */
#define ERRORS_ONLY      1	/* Only write a line if there is an error */
#define DEBUG_DETECT     0	/* Debug fdomain_16x0_detect() */
#define DEBUG_MESSAGES   1	/* Debug MESSAGE IN phase */
#define DEBUG_ABORT      1	/* Debug abort() routine */
#define DEBUG_RESET      1	/* Debug reset() routine */
#define DEBUG_RACE       1      /* Debug interrupt-driven race condition */
#else
#define EVERY_ACCESS     0	/* LEAVE THESE ALONE--CHANGE THE ONES ABOVE */
#define ERRORS_ONLY      0
#define DEBUG_DETECT     0
#define DEBUG_MESSAGES   0
#define DEBUG_ABORT      0
#define DEBUG_RESET      0
#define DEBUG_RACE       0
#endif

/* Errors are reported on the line, so we don't need to report them again */
#if EVERY_ACCESS
#undef ERRORS_ONLY
#define ERRORS_ONLY      0
#endif

#if ENABLE_PARITY
#define PARITY_MASK      0x08
#else
#define PARITY_MASK      0x00
#endif

enum chip_type {
   unknown          = 0x00,
   tmc1800          = 0x01,
   tmc18c50         = 0x02,
   tmc18c30         = 0x03,
};

enum {
   in_arbitration   = 0x02,
   in_selection     = 0x04,
   in_other         = 0x08,
   disconnect       = 0x10,
   aborted          = 0x20,
   sent_ident       = 0x40,
};

enum in_port_type {
   Read_SCSI_Data   =  0,
   SCSI_Status      =  1,
   TMC_Status       =  2,
   FIFO_Status      =  3,	/* tmc18c50/tmc18c30 only */
   Interrupt_Cond   =  4,	/* tmc18c50/tmc18c30 only */
   LSB_ID_Code      =  5,
   MSB_ID_Code      =  6,
   Read_Loopback    =  7,
   SCSI_Data_NoACK  =  8,
   Interrupt_Status =  9,
   Configuration1   = 10,
   Configuration2   = 11,	/* tmc18c50/tmc18c30 only */
   Read_FIFO        = 12,
   FIFO_Data_Count  = 14
};

enum out_port_type {
   Write_SCSI_Data  =  0,
   SCSI_Cntl        =  1,
   Interrupt_Cntl   =  2,
   SCSI_Mode_Cntl   =  3,
   TMC_Cntl         =  4,
   Memory_Cntl      =  5,	/* tmc18c50/tmc18c30 only */
   Write_Loopback   =  7,
   IO_Control       = 11,	/* tmc18c30 only */
   Write_FIFO       = 12
};

/* .bss will zero all the static variables below */
static int               port_base;
static unsigned long     bios_base;
static void __iomem *    bios_mem;
static int               bios_major;
static int               bios_minor;
static int               PCI_bus;
#ifdef CONFIG_PCI
static struct pci_dev	*PCI_dev;
#endif
static int               Quantum;	/* Quantum board variant */
static int               interrupt_level;
static volatile int      in_command;
static struct scsi_cmnd  *current_SC;
static enum chip_type    chip              = unknown;
static int               adapter_mask;
static int               this_id;
static int               setup_called;

#if DEBUG_RACE
static volatile int      in_interrupt_flag;
#endif

static int               FIFO_Size = 0x2000; /* 8k FIFO for
						pre-tmc18c30 chips */

static irqreturn_t       do_fdomain_16x0_intr( int irq, void *dev_id );
/* Allow insmod parameters to be like LILO parameters.  For example:
   insmod fdomain fdomain=0x140,11 */
static char * fdomain = NULL;
module_param(fdomain, charp, 0);

#ifndef PCMCIA

static unsigned long addresses[] = {
   0xc8000,
   0xca000,
   0xce000,
   0xde000,
   0xcc000,		/* Extra addresses for PCI boards */
   0xd0000,
   0xe0000,
};
#define ADDRESS_COUNT ARRAY_SIZE(addresses)

static unsigned short ports[] = { 0x140, 0x150, 0x160, 0x170 };
#define PORT_COUNT ARRAY_SIZE(ports)

static unsigned short ints[] = { 3, 5, 10, 11, 12, 14, 15, 0 };

#endif /* !PCMCIA */

/*

  READ THIS BEFORE YOU ADD A SIGNATURE!

  READING THIS SHORT NOTE CAN SAVE YOU LOTS OF TIME!

  READ EVERY WORD, ESPECIALLY THE WORD *NOT*

  This driver works *ONLY* for Future Domain cards using the TMC-1800,
  TMC-18C50, or TMC-18C30 chip.  This includes models TMC-1650, 1660, 1670,
  and 1680.  These are all 16-bit cards.

  The following BIOS signature signatures are for boards which do *NOT*
  work with this driver (these TMC-8xx and TMC-9xx boards may work with the
  Seagate driver):

  FUTURE DOMAIN CORP. (C) 1986-1988 V4.0I 03/16/88
  FUTURE DOMAIN CORP. (C) 1986-1989 V5.0C2/14/89
  FUTURE DOMAIN CORP. (C) 1986-1989 V6.0A7/28/89
  FUTURE DOMAIN CORP. (C) 1986-1990 V6.0105/31/90
  FUTURE DOMAIN CORP. (C) 1986-1990 V6.0209/18/90
  FUTURE DOMAIN CORP. (C) 1986-1990 V7.009/18/90
  FUTURE DOMAIN CORP. (C) 1992 V8.00.004/02/92

  (The cards which do *NOT* work are all 8-bit cards -- although some of
  them have a 16-bit form-factor, the upper 8-bits are used only for IRQs
  and are *NOT* used for data.  You can tell the difference by following
  the tracings on the circuit board -- if only the IRQ lines are involved,
  you have a "8-bit" card, and should *NOT* use this driver.)

*/

#ifndef PCMCIA

static struct signature {
   const char *signature;
   int  sig_offset;
   int  sig_length;
   int  major_bios_version;
   int  minor_bios_version;
   int  flag; /* 1 == PCI_bus, 2 == ISA_200S, 3 == ISA_250MG, 4 == ISA_200S */
} signatures[] = {
   /*          1         2         3         4         5         6 */
   /* 123456789012345678901234567890123456789012345678901234567890 */
   { "FUTURE DOMAIN CORP. (C) 1986-1990 1800-V2.07/28/89",  5, 50,  2,  0, 0 },
   { "FUTURE DOMAIN CORP. (C) 1986-1990 1800-V1.07/28/89",  5, 50,  2,  0, 0 },
   { "FUTURE DOMAIN CORP. (C) 1986-1990 1800-V2.07/28/89", 72, 50,  2,  0, 2 },
   { "FUTURE DOMAIN CORP. (C) 1986-1990 1800-V2.0",        73, 43,  2,  0, 3 },
   { "FUTURE DOMAIN CORP. (C) 1991 1800-V2.0.",            72, 39,  2,  0, 4 },
   { "FUTURE DOMAIN CORP. (C) 1992 V3.00.004/02/92",        5, 44,  3,  0, 0 },
   { "FUTURE DOMAIN TMC-18XX (C) 1993 V3.203/12/93",        5, 44,  3,  2, 0 },
   { "IBM F1 P2 BIOS v1.0104/29/93",                        5, 28,  3, -1, 0 },
   { "Future Domain Corp. V1.0008/18/93",                   5, 33,  3,  4, 0 },
   { "Future Domain Corp. V1.0008/18/93",                  26, 33,  3,  4, 1 },
   { "Adaptec AHA-2920 PCI-SCSI Card",                     42, 31,  3, -1, 1 },
   { "IBM F1 P264/32",                                      5, 14,  3, -1, 1 },
				/* This next signature may not be a 3.5 bios */
   { "Future Domain Corp. V2.0108/18/93",                   5, 33,  3,  5, 0 },
   { "FUTURE DOMAIN CORP.  V3.5008/18/93",                  5, 34,  3,  5, 0 },
   { "FUTURE DOMAIN 18c30/18c50/1800 (C) 1994 V3.5",        5, 44,  3,  5, 0 },
   { "FUTURE DOMAIN CORP.  V3.6008/18/93",                  5, 34,  3,  6, 0 },
   { "FUTURE DOMAIN CORP.  V3.6108/18/93",                  5, 34,  3,  6, 0 },
   { "FUTURE DOMAIN TMC-18XX",                              5, 22, -1, -1, 0 },

   /* READ NOTICE ABOVE *BEFORE* YOU WASTE YOUR TIME ADDING A SIGNATURE
    Also, fix the disk geometry code for your signature and send your
    changes for faith@cs.unc.edu.  Above all, do *NOT* change any old
    signatures!

    Note that the last line will match a "generic" 18XX bios.  Because
    Future Domain has changed the host SCSI ID and/or the location of the
    geometry information in the on-board RAM area for each of the first
    three BIOS's, it is still important to enter a fully qualified
    signature in the table for any new BIOS's (after the host SCSI ID and
    geometry location are verified). */
};

#define SIGNATURE_COUNT ARRAY_SIZE(signatures)

#endif /* !PCMCIA */

static void print_banner( struct Scsi_Host *shpnt )
{
   if (!shpnt) return;		/* This won't ever happen */

   if (bios_major < 0 && bios_minor < 0) {
      printk(KERN_INFO "scsi%d: <fdomain> No BIOS; using scsi id %d\n",
	      shpnt->host_no, shpnt->this_id);
   } else {
      printk(KERN_INFO "scsi%d: <fdomain> BIOS version ", shpnt->host_no);

      if (bios_major >= 0) printk("%d.", bios_major);
      else                 printk("?.");

      if (bios_minor >= 0) printk("%d", bios_minor);
      else                 printk("?.");

      printk( " at 0x%lx using scsi id %d\n",
	      bios_base, shpnt->this_id );
   }

				/* If this driver works for later FD PCI
				   boards, we will have to modify banner
				   for additional PCI cards, but for now if
				   it's PCI it's a TMC-3260 - JTM */
   printk(KERN_INFO "scsi%d: <fdomain> %s chip at 0x%x irq ",
	   shpnt->host_no,
	   chip == tmc1800 ? "TMC-1800" : (chip == tmc18c50 ? "TMC-18C50" : (chip == tmc18c30 ? (PCI_bus ? "TMC-36C70 (PCI bus)" : "TMC-18C30") : "Unknown")),
	   port_base);

   if (interrupt_level)
   	printk("%d", interrupt_level);
   else
        printk("<none>");

   printk( "\n" );
}

int fdomain_setup(char *str)
{
	int ints[4];

	(void)get_options(str, ARRAY_SIZE(ints), ints);

	if (setup_called++ || ints[0] < 2 || ints[0] > 3) {
		printk(KERN_INFO "scsi: <fdomain> Usage: fdomain=<PORT_BASE>,<IRQ>[,<ADAPTER_ID>]\n");
		printk(KERN_ERR "scsi: <fdomain> Bad LILO/INSMOD parameters?\n");
		return 0;
	}

	port_base       = ints[0] >= 1 ? ints[1] : 0;
	interrupt_level = ints[0] >= 2 ? ints[2] : 0;
	this_id         = ints[0] >= 3 ? ints[3] : 0;
   
	bios_major = bios_minor = -1; /* Use geometry for BIOS version >= 3.4 */
	++setup_called;
	return 1;
}

__setup("fdomain=", fdomain_setup);


static void do_pause(unsigned amount)	/* Pause for amount*10 milliseconds */
{
	mdelay(10*amount);
}

static inline void fdomain_make_bus_idle( void )
{
   outb(0, port_base + SCSI_Cntl);
   outb(0, port_base + SCSI_Mode_Cntl);
   if (chip == tmc18c50 || chip == tmc18c30)
	 outb(0x21 | PARITY_MASK, port_base + TMC_Cntl); /* Clear forced intr. */
   else
	 outb(0x01 | PARITY_MASK, port_base + TMC_Cntl);
}

static int fdomain_is_valid_port( int port )
{
#if DEBUG_DETECT 
   printk( " (%x%x),",
	   inb( port + MSB_ID_Code ), inb( port + LSB_ID_Code ) );
#endif

   /* The MCA ID is a unique id for each MCA compatible board.  We
      are using ISA boards, but Future Domain provides the MCA ID
      anyway.  We can use this ID to ensure that this is a Future
      Domain TMC-1660/TMC-1680.
    */

   if (inb( port + LSB_ID_Code ) != 0xe9) { /* test for 0x6127 id */
      if (inb( port + LSB_ID_Code ) != 0x27) return 0;
      if (inb( port + MSB_ID_Code ) != 0x61) return 0;
      chip = tmc1800;
   } else {				    /* test for 0xe960 id */
      if (inb( port + MSB_ID_Code ) != 0x60) return 0;
      chip = tmc18c50;

				/* Try to toggle 32-bit mode.  This only
				   works on an 18c30 chip.  (User reports
				   say this works, so we should switch to
				   it in the near future.) */

      outb( 0x80, port + IO_Control );
      if ((inb( port + Configuration2 ) & 0x80) == 0x80) {
	 outb( 0x00, port + IO_Control );
	 if ((inb( port + Configuration2 ) & 0x80) == 0x00) {
	    chip = tmc18c30;
	    FIFO_Size = 0x800;	/* 2k FIFO */
	 }
      }
				/* If that failed, we are an 18c50. */
   }

   return 1;
}

static int fdomain_test_loopback( void )
{
   int i;
   int result;

   for (i = 0; i < 255; i++) {
      outb( i, port_base + Write_Loopback );
      result = inb( port_base + Read_Loopback );
      if (i != result)
	    return 1;
   }
   return 0;
}

#ifndef PCMCIA

/* fdomain_get_irq assumes that we have a valid MCA ID for a
   TMC-1660/TMC-1680 Future Domain board.  Now, check to be sure the
   bios_base matches these ports.  If someone was unlucky enough to have
   purchased more than one Future Domain board, then they will have to
   modify this code, as we only detect one board here.  [The one with the
   lowest bios_base.]

   Note that this routine is only used for systems without a PCI BIOS32
   (e.g., ISA bus).  For PCI bus systems, this routine will likely fail
   unless one of the IRQs listed in the ints array is used by the board.
   Sometimes it is possible to use the computer's BIOS setup screen to
   configure a PCI system so that one of these IRQs will be used by the
   Future Domain card. */

static int fdomain_get_irq( int base )
{
   int options = inb(base + Configuration1);

#if DEBUG_DETECT
   printk("scsi: <fdomain> Options = %x\n", options);
#endif
 
   /* Check for board with lowest bios_base --
      this isn't valid for the 18c30 or for
      boards on the PCI bus, so just assume we
      have the right board. */

   if (chip != tmc18c30 && !PCI_bus && addresses[(options & 0xc0) >> 6 ] != bios_base)
   	return 0;
   return ints[(options & 0x0e) >> 1];
}

static int fdomain_isa_detect( int *irq, int *iobase )
{
   int i, j;
   int base = 0xdeadbeef;
   int flag = 0;

#if DEBUG_DETECT
   printk( "scsi: <fdomain> fdomain_isa_detect:" );
#endif

   for (i = 0; i < ADDRESS_COUNT; i++) {
      void __iomem *p = ioremap(addresses[i], 0x2000);
      if (!p)
	continue;
#if DEBUG_DETECT
      printk( " %lx(%lx),", addresses[i], bios_base );
#endif
      for (j = 0; j < SIGNATURE_COUNT; j++) {
	 if (check_signature(p + signatures[j].sig_offset,
			     signatures[j].signature,
			     signatures[j].sig_length )) {
	    bios_major = signatures[j].major_bios_version;
	    bios_minor = signatures[j].minor_bios_version;
	    PCI_bus    = (signatures[j].flag == 1);
	    Quantum    = (signatures[j].flag > 1) ? signatures[j].flag : 0;
	    bios_base  = addresses[i];
	    bios_mem   = p;
	    goto found;
	 }
      }
      iounmap(p);
   }
 
found:
   if (bios_major == 2) {
      /* The TMC-1660/TMC-1680 has a RAM area just after the BIOS ROM.
	 Assuming the ROM is enabled (otherwise we wouldn't have been
	 able to read the ROM signature :-), then the ROM sets up the
	 RAM area with some magic numbers, such as a list of port
	 base addresses and a list of the disk "geometry" reported to
	 DOS (this geometry has nothing to do with physical geometry).
       */

      switch (Quantum) {
      case 2:			/* ISA_200S */
      case 3:			/* ISA_250MG */
	 base = readb(bios_mem + 0x1fa2) + (readb(bios_mem + 0x1fa3) << 8);
	 break;
      case 4:			/* ISA_200S (another one) */
	 base = readb(bios_mem + 0x1fa3) + (readb(bios_mem + 0x1fa4) << 8);
	 break;
      default:
	 base = readb(bios_mem + 0x1fcc) + (readb(bios_mem + 0x1fcd) << 8);
	 break;
      }
   
#if DEBUG_DETECT
      printk( " %x,", base );
#endif

      for (i = 0; i < PORT_COUNT; i++) {
	if (base == ports[i]) {
		if (!request_region(base, 0x10, "fdomain"))
			break;
		if (!fdomain_is_valid_port(base)) {
			release_region(base, 0x10);
			break;
		}
		*irq    = fdomain_get_irq( base );
		*iobase = base;
		return 1;
	}
      }

      /* This is a bad sign.  It usually means that someone patched the
	 BIOS signature list (the signatures variable) to contain a BIOS
	 signature for a board *OTHER THAN* the TMC-1660/TMC-1680. */
      
#if DEBUG_DETECT
      printk( " RAM FAILED, " );
#endif
   }

   /* Anyway, the alternative to finding the address in the RAM is to just
      search through every possible port address for one that is attached
      to the Future Domain card.  Don't panic, though, about reading all
      these random port addresses -- there are rumors that the Future
      Domain BIOS does something very similar.

      Do not, however, check ports which the kernel knows are being used by
      another driver. */

   for (i = 0; i < PORT_COUNT; i++) {
      base = ports[i];
      if (!request_region(base, 0x10, "fdomain")) {
#if DEBUG_DETECT
	 printk( " (%x inuse),", base );
#endif
	 continue;
      }
#if DEBUG_DETECT
      printk( " %x,", base );
#endif
      flag = fdomain_is_valid_port(base);
      if (flag)
	break;
      release_region(base, 0x10);
   }

#if DEBUG_DETECT
   if (flag) printk( " SUCCESS\n" );
   else      printk( " FAILURE\n" );
#endif

   if (!flag) return 0;		/* iobase not found */

   *irq    = fdomain_get_irq( base );
   *iobase = base;

   return 1;			/* success */
}

#else /* PCMCIA */

static int fdomain_isa_detect( int *irq, int *iobase )
{
	if (irq)
		*irq = 0;
	if (iobase)
		*iobase = 0;
	return 0;
}

#endif /* !PCMCIA */


/* PCI detection function: int fdomain_pci_bios_detect(int* irq, int*
   iobase) This function gets the Interrupt Level and I/O base address from
   the PCI configuration registers. */

#ifdef CONFIG_PCI
static int fdomain_pci_bios_detect( int *irq, int *iobase, struct pci_dev **ret_pdev )
{
   unsigned int     pci_irq;                /* PCI interrupt line */
   unsigned long    pci_base;               /* PCI I/O base address */
   struct pci_dev   *pdev = NULL;

#if DEBUG_DETECT
   /* Tell how to print a list of the known PCI devices from bios32 and
      list vendor and device IDs being used if in debug mode.  */
      
   printk( "scsi: <fdomain> INFO: use lspci -v to see list of PCI devices\n" );
   printk( "scsi: <fdomain> TMC-3260 detect:"
	   " Using Vendor ID: 0x%x and Device ID: 0x%x\n",
	   PCI_VENDOR_ID_FD, 
	   PCI_DEVICE_ID_FD_36C70 );
#endif 

   if ((pdev = pci_get_device(PCI_VENDOR_ID_FD, PCI_DEVICE_ID_FD_36C70, pdev)) == NULL)
		return 0;
   if (pci_enable_device(pdev))
   	goto fail;
       
#if DEBUG_DETECT
   printk( "scsi: <fdomain> TMC-3260 detect:"
	   " PCI bus %u, device %u, function %u\n",
	   pdev->bus->number,
	   PCI_SLOT(pdev->devfn),
	   PCI_FUNC(pdev->devfn));
#endif

   /* We now have the appropriate device function for the FD board so we
      just read the PCI config info from the registers.  */

   pci_base = pci_resource_start(pdev, 0);
   pci_irq = pdev->irq;

   if (!request_region( pci_base, 0x10, "fdomain" ))
   	goto fail;

   /* Now we have the I/O base address and interrupt from the PCI
      configuration registers. */

   *irq    = pci_irq;
   *iobase = pci_base;
   *ret_pdev = pdev;

#if DEBUG_DETECT
   printk( "scsi: <fdomain> TMC-3260 detect:"
	   " IRQ = %d, I/O base = 0x%x [0x%lx]\n", *irq, *iobase, pci_base );
#endif

   if (!fdomain_is_valid_port(pci_base)) {
      printk(KERN_ERR "scsi: <fdomain> PCI card detected, but driver not loaded (invalid port)\n" );
      release_region(pci_base, 0x10);
      goto fail;
   }

				/* Fill in a few global variables.  Ugh. */
   bios_major = bios_minor = -1;
   PCI_bus    = 1;
   PCI_dev    = pdev;
   Quantum    = 0;
   bios_base  = 0;
   
   return 1;
fail:
   pci_dev_put(pdev);
   return 0;
}

#endif

struct Scsi_Host *__fdomain_16x0_detect(struct scsi_host_template *tpnt )
{
   int              retcode;
   struct Scsi_Host *shpnt;
   struct pci_dev *pdev = NULL;

   if (setup_called) {
#if DEBUG_DETECT
      printk( "scsi: <fdomain> No BIOS, using port_base = 0x%x, irq = %d\n",
	      port_base, interrupt_level );
#endif
      if (!request_region(port_base, 0x10, "fdomain")) {
	 printk( "scsi: <fdomain> port 0x%x is busy\n", port_base );
	 printk( "scsi: <fdomain> Bad LILO/INSMOD parameters?\n" );
	 return NULL;
      }
      if (!fdomain_is_valid_port( port_base )) {
	 printk( "scsi: <fdomain> Cannot locate chip at port base 0x%x\n",
		 port_base );
	 printk( "scsi: <fdomain> Bad LILO/INSMOD parameters?\n" );
	 release_region(port_base, 0x10);
	 return NULL;
      }
   } else {
      int flag = 0;

#ifdef CONFIG_PCI
				/* Try PCI detection first */
      flag = fdomain_pci_bios_detect( &interrupt_level, &port_base, &pdev );
#endif
      if (!flag) {
				/* Then try ISA bus detection */
	 flag = fdomain_isa_detect( &interrupt_level, &port_base );

	 if (!flag) {
	    printk( "scsi: <fdomain> Detection failed (no card)\n" );
	    return NULL;
	 }
      }
   }

   fdomain_16x0_bus_reset(NULL);

   if (fdomain_test_loopback()) {
      printk(KERN_ERR  "scsi: <fdomain> Detection failed (loopback test failed at port base 0x%x)\n", port_base);
      if (setup_called) {
	 printk(KERN_ERR "scsi: <fdomain> Bad LILO/INSMOD parameters?\n");
      }
      goto fail;
   }

   if (this_id) {
      tpnt->this_id = (this_id & 0x07);
      adapter_mask  = (1 << tpnt->this_id);
   } else {
      if (PCI_bus || (bios_major == 3 && bios_minor >= 2) || bios_major < 0) {
	 tpnt->this_id = 7;
	 adapter_mask  = 0x80;
      } else {
	 tpnt->this_id = 6;
	 adapter_mask  = 0x40;
      }
   }

/* Print out a banner here in case we can't
   get resources.  */

   shpnt = scsi_register( tpnt, 0 );
   if(shpnt == NULL) {
	release_region(port_base, 0x10);
   	return NULL;
   }
   shpnt->irq = interrupt_level;
   shpnt->io_port = port_base;
   shpnt->n_io_port = 0x10;
   print_banner( shpnt );

   /* Log IRQ with kernel */   
   if (!interrupt_level) {
      printk(KERN_ERR "scsi: <fdomain> Card Detected, but driver not loaded (no IRQ)\n" );
      goto fail;
   } else {
      /* Register the IRQ with the kernel */

      retcode = request_irq( interrupt_level,
			     do_fdomain_16x0_intr, pdev?IRQF_SHARED:0, "fdomain", shpnt);

      if (retcode < 0) {
	 if (retcode == -EINVAL) {
	    printk(KERN_ERR "scsi: <fdomain> IRQ %d is bad!\n", interrupt_level );
	    printk(KERN_ERR "                This shouldn't happen!\n" );
	    printk(KERN_ERR "                Send mail to faith@acm.org\n" );
	 } else if (retcode == -EBUSY) {
	    printk(KERN_ERR "scsi: <fdomain> IRQ %d is already in use!\n", interrupt_level );
	    printk(KERN_ERR "                Please use another IRQ!\n" );
	 } else {
	    printk(KERN_ERR "scsi: <fdomain> Error getting IRQ %d\n", interrupt_level );
	    printk(KERN_ERR "                This shouldn't happen!\n" );
	    printk(KERN_ERR "                Send mail to faith@acm.org\n" );
	 }
	 printk(KERN_ERR "scsi: <fdomain> Detected, but driver not loaded (IRQ)\n" );
	 goto fail;
      }
   }
   return shpnt;
fail:
   pci_dev_put(pdev);
   release_region(port_base, 0x10);
   return NULL;
}

static int fdomain_16x0_detect(struct scsi_host_template *tpnt)
{
	if (fdomain)
		fdomain_setup(fdomain);
	return (__fdomain_16x0_detect(tpnt) != NULL);
}

static const char *fdomain_16x0_info( struct Scsi_Host *ignore )
{
   static char buffer[128];
   char        *pt;
   
   strcpy( buffer, "Future Domain 16-bit SCSI Driver Version" );
   if (strchr( VERSION, ':')) { /* Assume VERSION is an RCS Revision string */
      strcat( buffer, strchr( VERSION, ':' ) + 1 );
      pt = strrchr( buffer, '$') - 1;
      if (!pt)  		/* Stripped RCS Revision string? */
	    pt = buffer + strlen( buffer ) - 1;
      if (*pt != ' ')
	    ++pt;
      *pt = '\0';
   } else {			/* Assume VERSION is a number */
      strcat( buffer, " " VERSION );
   }
      
   return buffer;
}

#if 0
static int fdomain_arbitrate( void )
{
   int           status = 0;
   unsigned long timeout;

#if EVERY_ACCESS
   printk( "fdomain_arbitrate()\n" );
#endif
   
   outb(0x00, port_base + SCSI_Cntl);              /* Disable data drivers */
   outb(adapter_mask, port_base + SCSI_Data_NoACK); /* Set our id bit */
   outb(0x04 | PARITY_MASK, port_base + TMC_Cntl); /* Start arbitration */

   timeout = 500;
   do {
      status = inb(port_base + TMC_Status);        /* Read adapter status */
      if (status & 0x02)		      /* Arbitration complete */
	    return 0;
      mdelay(1);			/* Wait one millisecond */
   } while (--timeout);

   /* Make bus idle */
   fdomain_make_bus_idle();

#if EVERY_ACCESS
   printk( "Arbitration failed, status = %x\n", status );
#endif
#if ERRORS_ONLY
   printk( "scsi: <fdomain> Arbitration failed, status = %x\n", status );
#endif
   return 1;
}
#endif

static int fdomain_select( int target )
{
   int           status;
   unsigned long timeout;
#if ERRORS_ONLY
   static int    flag = 0;
#endif

   outb(0x82, port_base + SCSI_Cntl); /* Bus Enable + Select */
   outb(adapter_mask | (1 << target), port_base + SCSI_Data_NoACK);

   /* Stop arbitration and enable parity */
   outb(PARITY_MASK, port_base + TMC_Cntl); 

   timeout = 350;			/* 350 msec */

   do {
      status = inb(port_base + SCSI_Status); /* Read adapter status */
      if (status & 1) {			/* Busy asserted */
	 /* Enable SCSI Bus (on error, should make bus idle with 0) */
	 outb(0x80, port_base + SCSI_Cntl);
	 return 0;
      }
      mdelay(1);			/* wait one msec */
   } while (--timeout);
   /* Make bus idle */
   fdomain_make_bus_idle();
#if EVERY_ACCESS
   if (!target) printk( "Selection failed\n" );
#endif
#if ERRORS_ONLY
   if (!target) {
      if (!flag) /* Skip first failure for all chips. */
	    ++flag;
      else
	    printk( "scsi: <fdomain> Selection failed\n" );
   }
#endif
   return 1;
}

static void my_done(int error)
{
   if (in_command) {
      in_command = 0;
      outb(0x00, port_base + Interrupt_Cntl);
      fdomain_make_bus_idle();
      current_SC->result = error;
      if (current_SC->scsi_done)
	    current_SC->scsi_done( current_SC );
      else panic( "scsi: <fdomain> current_SC->scsi_done() == NULL" );
   } else {
      panic( "scsi: <fdomain> my_done() called outside of command\n" );
   }
#if DEBUG_RACE
   in_interrupt_flag = 0;
#endif
}

static irqreturn_t do_fdomain_16x0_intr(int irq, void *dev_id)
{
   unsigned long flags;
   int      status;
   int      done = 0;
   unsigned data_count;

				/* The fdomain_16x0_intr is only called via
				   the interrupt handler.  The goal of the
				   sti() here is to allow other
				   interruptions while this routine is
				   running. */

   /* Check for other IRQ sources */
   if ((inb(port_base + TMC_Status) & 0x01) == 0)
   	return IRQ_NONE;

   /* It is our IRQ */   	
   outb(0x00, port_base + Interrupt_Cntl);

   /* We usually have one spurious interrupt after each command.  Ignore it. */
   if (!in_command || !current_SC) {	/* Spurious interrupt */
#if EVERY_ACCESS
      printk( "Spurious interrupt, in_command = %d, current_SC = %x\n",
	      in_command, current_SC );
#endif
      return IRQ_NONE;
   }

   /* Abort calls my_done, so we do nothing here. */
   if (current_SC->SCp.phase & aborted) {
#if DEBUG_ABORT
      printk( "scsi: <fdomain> Interrupt after abort, ignoring\n" );
#endif
      /*
      return IRQ_HANDLED; */
   }

#if DEBUG_RACE
   ++in_interrupt_flag;
#endif

   if (current_SC->SCp.phase & in_arbitration) {
      status = inb(port_base + TMC_Status);        /* Read adapter status */
      if (!(status & 0x02)) {
#if EVERY_ACCESS
	 printk( " AFAIL " );
#endif
         spin_lock_irqsave(current_SC->device->host->host_lock, flags);
	 my_done( DID_BUS_BUSY << 16 );
         spin_unlock_irqrestore(current_SC->device->host->host_lock, flags);
	 return IRQ_HANDLED;
      }
      current_SC->SCp.phase = in_selection;
      
      outb(0x40 | FIFO_COUNT, port_base + Interrupt_Cntl);

      outb(0x82, port_base + SCSI_Cntl); /* Bus Enable + Select */
      outb(adapter_mask | (1 << scmd_id(current_SC)), port_base + SCSI_Data_NoACK);
      
      /* Stop arbitration and enable parity */
      outb(0x10 | PARITY_MASK, port_base + TMC_Cntl);
#if DEBUG_RACE
      in_interrupt_flag = 0;
#endif
      return IRQ_HANDLED;
   } else if (current_SC->SCp.phase & in_selection) {
      status = inb(port_base + SCSI_Status);
      if (!(status & 0x01)) {
	 /* Try again, for slow devices */
	 if (fdomain_select( scmd_id(current_SC) )) {
#if EVERY_ACCESS
	    printk( " SFAIL " );
#endif
            spin_lock_irqsave(current_SC->device->host->host_lock, flags);
	    my_done( DID_NO_CONNECT << 16 );
            spin_unlock_irqrestore(current_SC->device->host->host_lock, flags);
	    return IRQ_HANDLED;
	 } else {
#if EVERY_ACCESS
	    printk( " AltSel " );
#endif
	    /* Stop arbitration and enable parity */
	    outb(0x10 | PARITY_MASK, port_base + TMC_Cntl);
	 }
      }
      current_SC->SCp.phase = in_other;
      outb(0x90 | FIFO_COUNT, port_base + Interrupt_Cntl);
      outb(0x80, port_base + SCSI_Cntl);
#if DEBUG_RACE
      in_interrupt_flag = 0;
#endif
      return IRQ_HANDLED;
   }
   
   /* current_SC->SCp.phase == in_other: this is the body of the routine */
   
   status = inb(port_base + SCSI_Status);
   
   if (status & 0x10) {	/* REQ */
      
      switch (status & 0x0e) {
       
      case 0x08:		/* COMMAND OUT */
	 outb(current_SC->cmnd[current_SC->SCp.sent_command++],
	      port_base + Write_SCSI_Data);
#if EVERY_ACCESS
	 printk( "CMD = %x,",
		 current_SC->cmnd[ current_SC->SCp.sent_command - 1] );
#endif
	 break;
      case 0x00:		/* DATA OUT -- tmc18c50/tmc18c30 only */
	 if (chip != tmc1800 && !current_SC->SCp.have_data_in) {
	    current_SC->SCp.have_data_in = -1;
	    outb(0xd0 | PARITY_MASK, port_base + TMC_Cntl);
	 }
	 break;
      case 0x04:		/* DATA IN -- tmc18c50/tmc18c30 only */
	 if (chip != tmc1800 && !current_SC->SCp.have_data_in) {
	    current_SC->SCp.have_data_in = 1;
	    outb(0x90 | PARITY_MASK, port_base + TMC_Cntl);
	 }
	 break;
      case 0x0c:		/* STATUS IN */
	 current_SC->SCp.Status = inb(port_base + Read_SCSI_Data);
#if EVERY_ACCESS
	 printk( "Status = %x, ", current_SC->SCp.Status );
#endif
#if ERRORS_ONLY
	 if (current_SC->SCp.Status
	     && current_SC->SCp.Status != 2
	     && current_SC->SCp.Status != 8) {
	    printk( "scsi: <fdomain> target = %d, command = %x, status = %x\n",
		    current_SC->device->id,
		    current_SC->cmnd[0],
		    current_SC->SCp.Status );
	 }
#endif
	       break;
      case 0x0a:		/* MESSAGE OUT */
	 outb(MESSAGE_REJECT, port_base + Write_SCSI_Data); /* Reject */
	 break;
      case 0x0e:		/* MESSAGE IN */
	 current_SC->SCp.Message = inb(port_base + Read_SCSI_Data);
#if EVERY_ACCESS
	 printk( "Message = %x, ", current_SC->SCp.Message );
#endif
	 if (!current_SC->SCp.Message) ++done;
#if DEBUG_MESSAGES || EVERY_ACCESS
	 if (current_SC->SCp.Message) {
	    printk( "scsi: <fdomain> message = %x\n",
		    current_SC->SCp.Message );
	 }
#endif
	 break;
      }
   }

   if (chip == tmc1800 && !current_SC->SCp.have_data_in
       && (current_SC->SCp.sent_command >= current_SC->cmd_len)) {
      
      if(current_SC->sc_data_direction == DMA_TO_DEVICE)
      {
	 current_SC->SCp.have_data_in = -1;
	 outb(0xd0 | PARITY_MASK, port_base + TMC_Cntl);
      }
      else
      {
	 current_SC->SCp.have_data_in = 1;
	 outb(0x90 | PARITY_MASK, port_base + TMC_Cntl);
      }
   }

   if (current_SC->SCp.have_data_in == -1) { /* DATA OUT */
      while ((data_count = FIFO_Size - inw(port_base + FIFO_Data_Count)) > 512) {
#if EVERY_ACCESS
	 printk( "DC=%d, ", data_count ) ;
#endif
	 if (data_count > current_SC->SCp.this_residual)
	       data_count = current_SC->SCp.this_residual;
	 if (data_count > 0) {
#if EVERY_ACCESS
	    printk( "%d OUT, ", data_count );
#endif
	    if (data_count == 1) {
	       outb(*current_SC->SCp.ptr++, port_base + Write_FIFO);
	       --current_SC->SCp.this_residual;
	    } else {
	       data_count >>= 1;
	       outsw(port_base + Write_FIFO, current_SC->SCp.ptr, data_count);
	       current_SC->SCp.ptr += 2 * data_count;
	       current_SC->SCp.this_residual -= 2 * data_count;
	    }
	 }
	 if (!current_SC->SCp.this_residual) {
	    if (current_SC->SCp.buffers_residual) {
	       --current_SC->SCp.buffers_residual;
	       ++current_SC->SCp.buffer;
	       current_SC->SCp.ptr = sg_virt(current_SC->SCp.buffer);
	       current_SC->SCp.this_residual = current_SC->SCp.buffer->length;
	    } else
		  break;
	 }
      }
   }
   
   if (current_SC->SCp.have_data_in == 1) { /* DATA IN */
      while ((data_count = inw(port_base + FIFO_Data_Count)) > 0) {
#if EVERY_ACCESS
	 printk( "DC=%d, ", data_count );
#endif
	 if (data_count > current_SC->SCp.this_residual)
	       data_count = current_SC->SCp.this_residual;
	 if (data_count) {
#if EVERY_ACCESS
	    printk( "%d IN, ", data_count );
#endif
	    if (data_count == 1) {
	       *current_SC->SCp.ptr++ = inb(port_base + Read_FIFO);
	       --current_SC->SCp.this_residual;
	    } else {
	       data_count >>= 1; /* Number of words */
	       insw(port_base + Read_FIFO, current_SC->SCp.ptr, data_count);
	       current_SC->SCp.ptr += 2 * data_count;
	       current_SC->SCp.this_residual -= 2 * data_count;
	    }
	 }
	 if (!current_SC->SCp.this_residual
	     && current_SC->SCp.buffers_residual) {
	    --current_SC->SCp.buffers_residual;
	    ++current_SC->SCp.buffer;
	    current_SC->SCp.ptr = sg_virt(current_SC->SCp.buffer);
	    current_SC->SCp.this_residual = current_SC->SCp.buffer->length;
	 }
      }
   }
   
   if (done) {
#if EVERY_ACCESS
      printk( " ** IN DONE %d ** ", current_SC->SCp.have_data_in );
#endif

#if ERRORS_ONLY
      if (current_SC->cmnd[0] == REQUEST_SENSE && !current_SC->SCp.Status) {
	      char *buf = scsi_sglist(current_SC);
	 if ((unsigned char)(*(buf + 2)) & 0x0f) {
	    unsigned char key;
	    unsigned char code;
	    unsigned char qualifier;

	    key = (unsigned char)(*(buf + 2)) & 0x0f;
	    code = (unsigned char)(*(buf + 12));
	    qualifier = (unsigned char)(*(buf + 13));

	    if (key != UNIT_ATTENTION
		&& !(key == NOT_READY
		     && code == 0x04
		     && (!qualifier || qualifier == 0x02 || qualifier == 0x01))
		&& !(key == ILLEGAL_REQUEST && (code == 0x25
						|| code == 0x24
						|| !code)))
		  
		  printk( "scsi: <fdomain> REQUEST SENSE"
			  " Key = %x, Code = %x, Qualifier = %x\n",
			  key, code, qualifier );
	 }
      }
#endif
#if EVERY_ACCESS
      printk( "BEFORE MY_DONE. . ." );
#endif
      spin_lock_irqsave(current_SC->device->host->host_lock, flags);
      my_done( (current_SC->SCp.Status & 0xff)
	       | ((current_SC->SCp.Message & 0xff) << 8) | (DID_OK << 16) );
      spin_unlock_irqrestore(current_SC->device->host->host_lock, flags);
#if EVERY_ACCESS
      printk( "RETURNING.\n" );
#endif
      
   } else {
      if (current_SC->SCp.phase & disconnect) {
	 outb(0xd0 | FIFO_COUNT, port_base + Interrupt_Cntl);
	 outb(0x00, port_base + SCSI_Cntl);
      } else {
	 outb(0x90 | FIFO_COUNT, port_base + Interrupt_Cntl);
      }
   }
#if DEBUG_RACE
   in_interrupt_flag = 0;
#endif
   return IRQ_HANDLED;
}

static int fdomain_16x0_queue(struct scsi_cmnd *SCpnt,
		void (*done)(struct scsi_cmnd *))
{
   if (in_command) {
      panic( "scsi: <fdomain> fdomain_16x0_queue() NOT REENTRANT!\n" );
   }
#if EVERY_ACCESS
   printk( "queue: target = %d cmnd = 0x%02x pieces = %d size = %u\n",
	   SCpnt->target,
	   *(unsigned char *)SCpnt->cmnd,
	   scsi_sg_count(SCpnt),
	   scsi_bufflen(SCpnt));
#endif

   fdomain_make_bus_idle();

   current_SC            = SCpnt; /* Save this for the done function */
   current_SC->scsi_done = done;

   /* Initialize static data */

   if (scsi_sg_count(current_SC)) {
	   current_SC->SCp.buffer = scsi_sglist(current_SC);
	   current_SC->SCp.ptr = sg_virt(current_SC->SCp.buffer);
	   current_SC->SCp.this_residual    = current_SC->SCp.buffer->length;
	   current_SC->SCp.buffers_residual = scsi_sg_count(current_SC) - 1;
   } else {
	   current_SC->SCp.ptr              = NULL;
	   current_SC->SCp.this_residual    = 0;
	   current_SC->SCp.buffer           = NULL;
	   current_SC->SCp.buffers_residual = 0;
   }

   current_SC->SCp.Status              = 0;
   current_SC->SCp.Message             = 0;
   current_SC->SCp.have_data_in        = 0;
   current_SC->SCp.sent_command        = 0;
   current_SC->SCp.phase               = in_arbitration;

   /* Start arbitration */
   outb(0x00, port_base + Interrupt_Cntl);
   outb(0x00, port_base + SCSI_Cntl);              /* Disable data drivers */
   outb(adapter_mask, port_base + SCSI_Data_NoACK); /* Set our id bit */
   ++in_command;
   outb(0x20, port_base + Interrupt_Cntl);
   outb(0x14 | PARITY_MASK, port_base + TMC_Cntl); /* Start arbitration */

   return 0;
}

#if DEBUG_ABORT
static void print_info(struct scsi_cmnd *SCpnt)
{
   unsigned int imr;
   unsigned int irr;
   unsigned int isr;

   if (!SCpnt || !SCpnt->device || !SCpnt->device->host) {
      printk(KERN_WARNING "scsi: <fdomain> Cannot provide detailed information\n");
      return;
   }
   
   printk(KERN_INFO "%s\n", fdomain_16x0_info( SCpnt->device->host ) );
   print_banner(SCpnt->device->host);
   switch (SCpnt->SCp.phase) {
   case in_arbitration: printk("arbitration"); break;
   case in_selection:   printk("selection");   break;
   case in_other:       printk("other");       break;
   default:             printk("unknown");     break;
   }

   printk( " (%d), target = %d cmnd = 0x%02x pieces = %d size = %u\n",
	   SCpnt->SCp.phase,
	   SCpnt->device->id,
	   *(unsigned char *)SCpnt->cmnd,
	   scsi_sg_count(SCpnt),
	   scsi_bufflen(SCpnt));
   printk( "sent_command = %d, have_data_in = %d, timeout = %d\n",
	   SCpnt->SCp.sent_command,
	   SCpnt->SCp.have_data_in,
	   SCpnt->timeout );
#if DEBUG_RACE
   printk( "in_interrupt_flag = %d\n", in_interrupt_flag );
#endif

   imr = (inb( 0x0a1 ) << 8) + inb( 0x21 );
   outb( 0x0a, 0xa0 );
   irr = inb( 0xa0 ) << 8;
   outb( 0x0a, 0x20 );
   irr += inb( 0x20 );
   outb( 0x0b, 0xa0 );
   isr = inb( 0xa0 ) << 8;
   outb( 0x0b, 0x20 );
   isr += inb( 0x20 );

				/* Print out interesting information */
   printk( "IMR = 0x%04x", imr );
   if (imr & (1 << interrupt_level))
	 printk( " (masked)" );
   printk( ", IRR = 0x%04x, ISR = 0x%04x\n", irr, isr );

   printk( "SCSI Status      = 0x%02x\n", inb(port_base + SCSI_Status));
   printk( "TMC Status       = 0x%02x", inb(port_base + TMC_Status));
   if (inb((port_base + TMC_Status) & 1))
	 printk( " (interrupt)" );
   printk( "\n" );
   printk("Interrupt Status = 0x%02x", inb(port_base + Interrupt_Status));
   if (inb(port_base + Interrupt_Status) & 0x08)
	 printk( " (enabled)" );
   printk( "\n" );
   if (chip == tmc18c50 || chip == tmc18c30) {
      printk("FIFO Status      = 0x%02x\n", inb(port_base + FIFO_Status));
      printk( "Int. Condition   = 0x%02x\n",
	      inb( port_base + Interrupt_Cond ) );
   }
   printk( "Configuration 1  = 0x%02x\n", inb( port_base + Configuration1 ) );
   if (chip == tmc18c50 || chip == tmc18c30)
	 printk( "Configuration 2  = 0x%02x\n",
		 inb( port_base + Configuration2 ) );
}
#endif

static int fdomain_16x0_abort(struct scsi_cmnd *SCpnt)
{
#if EVERY_ACCESS || ERRORS_ONLY || DEBUG_ABORT
   printk( "scsi: <fdomain> abort " );
#endif

   if (!in_command) {
#if EVERY_ACCESS || ERRORS_ONLY
      printk( " (not in command)\n" );
#endif
      return FAILED;
   } else printk( "\n" );

#if DEBUG_ABORT
   print_info( SCpnt );
#endif

   fdomain_make_bus_idle();
   current_SC->SCp.phase |= aborted;
   current_SC->result = DID_ABORT << 16;
   
   /* Aborts are not done well. . . */
   my_done(DID_ABORT << 16);
   return SUCCESS;
}

int fdomain_16x0_bus_reset(struct scsi_cmnd *SCpnt)
{
   unsigned long flags;

   local_irq_save(flags);

   outb(1, port_base + SCSI_Cntl);
   do_pause( 2 );
   outb(0, port_base + SCSI_Cntl);
   do_pause( 115 );
   outb(0, port_base + SCSI_Mode_Cntl);
   outb(PARITY_MASK, port_base + TMC_Cntl);

   local_irq_restore(flags);
   return SUCCESS;
}

static int fdomain_16x0_biosparam(struct scsi_device *sdev,
		struct block_device *bdev,
		sector_t capacity, int *info_array)
{
   int              drive;
   int		    size      = capacity;
   unsigned long    offset;
   struct drive_info {
      unsigned short cylinders;
      unsigned char  heads;
      unsigned char  sectors;
   } i;
   
   /* NOTES:
      The RAM area starts at 0x1f00 from the bios_base address.

      For BIOS Version 2.0:
      
      The drive parameter table seems to start at 0x1f30.
      The first byte's purpose is not known.
      Next is the cylinder, head, and sector information.
      The last 4 bytes appear to be the drive's size in sectors.
      The other bytes in the drive parameter table are unknown.
      If anyone figures them out, please send me mail, and I will
      update these notes.

      Tape drives do not get placed in this table.

      There is another table at 0x1fea:
      If the byte is 0x01, then the SCSI ID is not in use.
      If the byte is 0x18 or 0x48, then the SCSI ID is in use,
      although tapes don't seem to be in this table.  I haven't
      seen any other numbers (in a limited sample).

      0x1f2d is a drive count (i.e., not including tapes)

      The table at 0x1fcc are I/O ports addresses for the various
      operations.  I calculate these by hand in this driver code.

      
      
      For the ISA-200S version of BIOS Version 2.0:

      The drive parameter table starts at 0x1f33.

      WARNING: Assume that the table entry is 25 bytes long.  Someone needs
      to check this for the Quantum ISA-200S card.

      
      
      For BIOS Version 3.2:

      The drive parameter table starts at 0x1f70.  Each entry is
      0x0a bytes long.  Heads are one less than we need to report.
    */

   if (MAJOR(bdev->bd_dev) != SCSI_DISK0_MAJOR) {
      printk("scsi: <fdomain> fdomain_16x0_biosparam: too many disks");
      return 0;
   }
   drive = MINOR(bdev->bd_dev) >> 4;

   if (bios_major == 2) {
      switch (Quantum) {
      case 2:			/* ISA_200S */
				/* The value of 25 has never been verified.
				   It should probably be 15. */
	 offset = 0x1f33 + drive * 25;
	 break;
      case 3:			/* ISA_250MG */
	 offset = 0x1f36 + drive * 15;
	 break;
      case 4:			/* ISA_200S (another one) */
	 offset = 0x1f34 + drive * 15;
	 break;
      default:
	 offset = 0x1f31 + drive * 25;
	 break;
      }
      memcpy_fromio( &i, bios_mem + offset, sizeof( struct drive_info ) );
      info_array[0] = i.heads;
      info_array[1] = i.sectors;
      info_array[2] = i.cylinders;
   } else if (bios_major == 3
	      && bios_minor >= 0
	      && bios_minor < 4) { /* 3.0 and 3.2 BIOS */
      memcpy_fromio( &i, bios_mem + 0x1f71 + drive * 10,
		     sizeof( struct drive_info ) );
      info_array[0] = i.heads + 1;
      info_array[1] = i.sectors;
      info_array[2] = i.cylinders;
   } else {			/* 3.4 BIOS (and up?) */
      /* This algorithm was provided by Future Domain (much thanks!). */
      unsigned char *p = scsi_bios_ptable(bdev);

      if (p && p[65] == 0xaa && p[64] == 0x55 /* Partition table valid */
	  && p[4]) {			    /* Partition type */

	 /* The partition table layout is as follows:

	    Start: 0x1b3h
	    Offset: 0 = partition status
		    1 = starting head
		    2 = starting sector and cylinder (word, encoded)
		    4 = partition type
		    5 = ending head
		    6 = ending sector and cylinder (word, encoded)
		    8 = starting absolute sector (double word)
		    c = number of sectors (double word)
	    Signature: 0x1fe = 0x55aa

	    So, this algorithm assumes:
	    1) the first partition table is in use,
	    2) the data in the first entry is correct, and
	    3) partitions never divide cylinders

	    Note that (1) may be FALSE for NetBSD (and other BSD flavors),
	    as well as for Linux.  Note also, that Linux doesn't pay any
	    attention to the fields that are used by this algorithm -- it
	    only uses the absolute sector data.  Recent versions of Linux's
	    fdisk(1) will fill this data in correctly, and forthcoming
	    versions will check for consistency.

	    Checking for a non-zero partition type is not part of the
	    Future Domain algorithm, but it seemed to be a reasonable thing
	    to do, especially in the Linux and BSD worlds. */

	 info_array[0] = p[5] + 1;	    /* heads */
	 info_array[1] = p[6] & 0x3f;	    /* sectors */
      } else {

 	 /* Note that this new method guarantees that there will always be
	    less than 1024 cylinders on a platter.  This is good for drives
	    up to approximately 7.85GB (where 1GB = 1024 * 1024 kB). */

	 if ((unsigned int)size >= 0x7e0000U) {
	    info_array[0] = 0xff; /* heads   = 255 */
	    info_array[1] = 0x3f; /* sectors =  63 */
	 } else if ((unsigned int)size >= 0x200000U) {
	    info_array[0] = 0x80; /* heads   = 128 */
	    info_array[1] = 0x3f; /* sectors =  63 */
	 } else {
	    info_array[0] = 0x40; /* heads   =  64 */
	    info_array[1] = 0x20; /* sectors =  32 */
	 }
      }
				/* For both methods, compute the cylinders */
      info_array[2] = (unsigned int)size / (info_array[0] * info_array[1] );
      kfree(p);
   }
   
   return 0;
}

static int fdomain_16x0_release(struct Scsi_Host *shpnt)
{
	if (shpnt->irq)
		free_irq(shpnt->irq, shpnt);
	if (shpnt->io_port && shpnt->n_io_port)
		release_region(shpnt->io_port, shpnt->n_io_port);
	if (PCI_bus)
		pci_dev_put(PCI_dev);
	return 0;
}

struct scsi_host_template fdomain_driver_template = {
	.module			= THIS_MODULE,
	.name			= "fdomain",
	.proc_name		= "fdomain",
	.detect			= fdomain_16x0_detect,
	.info			= fdomain_16x0_info,
	.queuecommand		= fdomain_16x0_queue,
	.eh_abort_handler	= fdomain_16x0_abort,
	.eh_bus_reset_handler	= fdomain_16x0_bus_reset,
	.bios_param		= fdomain_16x0_biosparam,
	.release		= fdomain_16x0_release,
	.can_queue		= 1,
	.this_id		= 6,
	.sg_tablesize		= 64,
	.cmd_per_lun		= 1,
	.use_clustering		= DISABLE_CLUSTERING,
};

#ifndef PCMCIA
#ifdef CONFIG_PCI

static struct pci_device_id fdomain_pci_tbl[] __devinitdata = {
	{ PCI_VENDOR_ID_FD, PCI_DEVICE_ID_FD_36C70,
	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
	{ }
};
MODULE_DEVICE_TABLE(pci, fdomain_pci_tbl);
#endif
#define driver_template fdomain_driver_template
#include "scsi_module.c"

#endif
y a role for bootstrapping * hotplugged processors. * * zoneinfo_show() and maybe other functions do * not check if the processor is online before following the pageset pointer. * Other parts of the kernel may not check if the zone is available. */ static struct per_cpu_pageset boot_pageset[NR_CPUS]; /* * Dynamically allocate memory for the * per cpu pageset array in struct zone. */ static int __cpuinit process_zones(int cpu) { struct zone *zone, *dzone; for_each_zone(zone) { if (!populated_zone(zone)) continue; zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset), GFP_KERNEL, cpu_to_node(cpu)); if (!zone_pcp(zone, cpu)) goto bad; setup_pageset(zone_pcp(zone, cpu), zone_batchsize(zone)); if (percpu_pagelist_fraction) setup_pagelist_highmark(zone_pcp(zone, cpu), (zone->present_pages / percpu_pagelist_fraction)); } return 0; bad: for_each_zone(dzone) { if (dzone == zone) break; kfree(zone_pcp(dzone, cpu)); zone_pcp(dzone, cpu) = NULL; } return -ENOMEM; } static inline void free_zone_pagesets(int cpu) { struct zone *zone; for_each_zone(zone) { struct per_cpu_pageset *pset = zone_pcp(zone, cpu); /* Free per_cpu_pageset if it is slab allocated */ if (pset != &boot_pageset[cpu]) kfree(pset); zone_pcp(zone, cpu) = NULL; } } static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { int cpu = (long)hcpu; int ret = NOTIFY_OK; switch (action) { case CPU_UP_PREPARE: if (process_zones(cpu)) ret = NOTIFY_BAD; break; case CPU_UP_CANCELED: case CPU_DEAD: free_zone_pagesets(cpu); break; default: break; } return ret; } static struct notifier_block __cpuinitdata pageset_notifier = { &pageset_cpuup_callback, NULL, 0 }; void __init setup_per_cpu_pageset(void) { int err; /* Initialize per_cpu_pageset for cpu 0. * A cpuup callback will do this for every cpu * as it comes online */ err = process_zones(smp_processor_id()); BUG_ON(err); register_cpu_notifier(&pageset_notifier); } #endif static __meminit int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) { int i; struct pglist_data *pgdat = zone->zone_pgdat; size_t alloc_size; /* * The per-page waitqueue mechanism uses hashed waitqueues * per zone. */ zone->wait_table_hash_nr_entries = wait_table_hash_nr_entries(zone_size_pages); zone->wait_table_bits = wait_table_bits(zone->wait_table_hash_nr_entries); alloc_size = zone->wait_table_hash_nr_entries * sizeof(wait_queue_head_t); if (system_state == SYSTEM_BOOTING) { zone->wait_table = (wait_queue_head_t *) alloc_bootmem_node(pgdat, alloc_size); } else { /* * This case means that a zone whose size was 0 gets new memory * via memory hot-add. * But it may be the case that a new node was hot-added. In * this case vmalloc() will not be able to use this new node's * memory - this wait_table must be initialized to use this new * node itself as well. * To use this new node's memory, further consideration will be * necessary. */ zone->wait_table = (wait_queue_head_t *)vmalloc(alloc_size); } if (!zone->wait_table) return -ENOMEM; for(i = 0; i < zone->wait_table_hash_nr_entries; ++i) init_waitqueue_head(zone->wait_table + i); return 0; } static __meminit void zone_pcp_init(struct zone *zone) { int cpu; unsigned long batch = zone_batchsize(zone); for (cpu = 0; cpu < NR_CPUS; cpu++) { #ifdef CONFIG_NUMA /* Early boot. Slab allocator not functional yet */ zone_pcp(zone, cpu) = &boot_pageset[cpu]; setup_pageset(&boot_pageset[cpu],0); #else setup_pageset(zone_pcp(zone,cpu), batch); #endif } if (zone->present_pages) printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n", zone->name, zone->present_pages, batch); } __meminit int init_currently_empty_zone(struct zone *zone, unsigned long zone_start_pfn, unsigned long size) { struct pglist_data *pgdat = zone->zone_pgdat; int ret; ret = zone_wait_table_init(zone, size); if (ret) return ret; pgdat->nr_zones = zone_idx(zone) + 1; zone->zone_start_pfn = zone_start_pfn; memmap_init(size, pgdat->node_id, zone_idx(zone), zone_start_pfn); zone_init_free_lists(pgdat, zone, zone->spanned_pages); return 0; } #ifdef CONFIG_ARCH_POPULATES_NODE_MAP /* * Basic iterator support. Return the first range of PFNs for a node * Note: nid == MAX_NUMNODES returns first region regardless of node */ static int __init first_active_region_index_in_nid(int nid) { int i; for (i = 0; i < nr_nodemap_entries; i++) if (nid == MAX_NUMNODES || early_node_map[i].nid == nid) return i; return -1; } /* * Basic iterator support. Return the next active range of PFNs for a node * Note: nid == MAX_NUMNODES returns next region regardles of node */ static int __init next_active_region_index_in_nid(int index, int nid) { for (index = index + 1; index < nr_nodemap_entries; index++) if (nid == MAX_NUMNODES || early_node_map[index].nid == nid) return index; return -1; } #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID /* * Required by SPARSEMEM. Given a PFN, return what node the PFN is on. * Architectures may implement their own version but if add_active_range() * was used and there are no special requirements, this is a convenient * alternative */ int __init early_pfn_to_nid(unsigned long pfn) { int i; for (i = 0; i < nr_nodemap_entries; i++) { unsigned long start_pfn = early_node_map[i].start_pfn; unsigned long end_pfn = early_node_map[i].end_pfn; if (start_pfn <= pfn && pfn < end_pfn) return early_node_map[i].nid; } return 0; } #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ /* Basic iterator support to walk early_node_map[] */ #define for_each_active_range_index_in_nid(i, nid) \ for (i = first_active_region_index_in_nid(nid); i != -1; \ i = next_active_region_index_in_nid(i, nid)) /** * free_bootmem_with_active_regions - Call free_bootmem_node for each active range * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed. * @max_low_pfn: The highest PFN that will be passed to free_bootmem_node * * If an architecture guarantees that all ranges registered with * add_active_ranges() contain no holes and may be freed, this * this function may be used instead of calling free_bootmem() manually. */ void __init free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn) { int i; for_each_active_range_index_in_nid(i, nid) { unsigned long size_pages = 0; unsigned long end_pfn = early_node_map[i].end_pfn; if (early_node_map[i].start_pfn >= max_low_pfn) continue; if (end_pfn > max_low_pfn) end_pfn = max_low_pfn; size_pages = end_pfn - early_node_map[i].start_pfn; free_bootmem_node(NODE_DATA(early_node_map[i].nid), PFN_PHYS(early_node_map[i].start_pfn), size_pages << PAGE_SHIFT); } } /** * sparse_memory_present_with_active_regions - Call memory_present for each active range * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used. * * If an architecture guarantees that all ranges registered with * add_active_ranges() contain no holes and may be freed, this * function may be used instead of calling memory_present() manually. */ void __init sparse_memory_present_with_active_regions(int nid) { int i; for_each_active_range_index_in_nid(i, nid) memory_present(early_node_map[i].nid, early_node_map[i].start_pfn, early_node_map[i].end_pfn); } /** * push_node_boundaries - Push node boundaries to at least the requested boundary * @nid: The nid of the node to push the boundary for * @start_pfn: The start pfn of the node * @end_pfn: The end pfn of the node * * In reserve-based hot-add, mem_map is allocated that is unused until hotadd * time. Specifically, on x86_64, SRAT will report ranges that can potentially * be hotplugged even though no physical memory exists. This function allows * an arch to push out the node boundaries so mem_map is allocated that can * be used later. */ #ifdef CONFIG_MEMORY_HOTPLUG_RESERVE void __init push_node_boundaries(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn) { printk(KERN_DEBUG "Entering push_node_boundaries(%u, %lu, %lu)\n", nid, start_pfn, end_pfn); /* Initialise the boundary for this node if necessary */ if (node_boundary_end_pfn[nid] == 0) node_boundary_start_pfn[nid] = -1UL; /* Update the boundaries */ if (node_boundary_start_pfn[nid] > start_pfn) node_boundary_start_pfn[nid] = start_pfn; if (node_boundary_end_pfn[nid] < end_pfn) node_boundary_end_pfn[nid] = end_pfn; } /* If necessary, push the node boundary out for reserve hotadd */ static void __init account_node_boundary(unsigned int nid, unsigned long *start_pfn, unsigned long *end_pfn) { printk(KERN_DEBUG "Entering account_node_boundary(%u, %lu, %lu)\n", nid, *start_pfn, *end_pfn); /* Return if boundary information has not been provided */ if (node_boundary_end_pfn[nid] == 0) return; /* Check the boundaries and update if necessary */ if (node_boundary_start_pfn[nid] < *start_pfn) *start_pfn = node_boundary_start_pfn[nid]; if (node_boundary_end_pfn[nid] > *end_pfn) *end_pfn = node_boundary_end_pfn[nid]; } #else void __init push_node_boundaries(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn) {} static void __init account_node_boundary(unsigned int nid, unsigned long *start_pfn, unsigned long *end_pfn) {} #endif /** * get_pfn_range_for_nid - Return the start and end page frames for a node * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned. * @start_pfn: Passed by reference. On return, it will have the node start_pfn. * @end_pfn: Passed by reference. On return, it will have the node end_pfn. * * It returns the start and end page frame of a node based on information * provided by an arch calling add_active_range(). If called for a node * with no available memory, a warning is printed and the start and end * PFNs will be 0. */ void __init get_pfn_range_for_nid(unsigned int nid, unsigned long *start_pfn, unsigned long *end_pfn) { int i; *start_pfn = -1UL; *end_pfn = 0; for_each_active_range_index_in_nid(i, nid) { *start_pfn = min(*start_pfn, early_node_map[i].start_pfn); *end_pfn = max(*end_pfn, early_node_map[i].end_pfn); } if (*start_pfn == -1UL) { printk(KERN_WARNING "Node %u active with no memory\n", nid); *start_pfn = 0; } /* Push the node boundaries out if requested */ account_node_boundary(nid, start_pfn, end_pfn); } /* * Return the number of pages a zone spans in a node, including holes * present_pages = zone_spanned_pages_in_node() - zone_absent_pages_in_node() */ unsigned long __init zone_spanned_pages_in_node(int nid, unsigned long zone_type, unsigned long *ignored) { unsigned long node_start_pfn, node_end_pfn; unsigned long zone_start_pfn, zone_end_pfn; /* Get the start and end of the node and zone */ get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn); zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type]; zone_end_pfn = arch_zone_highest_possible_pfn[zone_type]; /* Check that this node has pages within the zone's required range */ if (zone_end_pfn < node_start_pfn || zone_start_pfn > node_end_pfn) return 0; /* Move the zone boundaries inside the node if necessary */ zone_end_pfn = min(zone_end_pfn, node_end_pfn); zone_start_pfn = max(zone_start_pfn, node_start_pfn); /* Return the spanned pages */ return zone_end_pfn - zone_start_pfn; } /* * Return the number of holes in a range on a node. If nid is MAX_NUMNODES, * then all holes in the requested range will be accounted for. */ unsigned long __init __absent_pages_in_range(int nid, unsigned long range_start_pfn, unsigned long range_end_pfn) { int i = 0; unsigned long prev_end_pfn = 0, hole_pages = 0; unsigned long start_pfn; /* Find the end_pfn of the first active range of pfns in the node */ i = first_active_region_index_in_nid(nid); if (i == -1) return 0; /* Account for ranges before physical memory on this node */ if (early_node_map[i].start_pfn > range_start_pfn) hole_pages = early_node_map[i].start_pfn - range_start_pfn; prev_end_pfn = early_node_map[i].start_pfn; /* Find all holes for the zone within the node */ for (; i != -1; i = next_active_region_index_in_nid(i, nid)) { /* No need to continue if prev_end_pfn is outside the zone */ if (prev_end_pfn >= range_end_pfn) break; /* Make sure the end of the zone is not within the hole */ start_pfn = min(early_node_map[i].start_pfn, range_end_pfn); prev_end_pfn = max(prev_end_pfn, range_start_pfn); /* Update the hole size cound and move on */ if (start_pfn > range_start_pfn) { BUG_ON(prev_end_pfn > start_pfn); hole_pages += start_pfn - prev_end_pfn; } prev_end_pfn = early_node_map[i].end_pfn; } /* Account for ranges past physical memory on this node */ if (range_end_pfn > prev_end_pfn) hole_pages += range_end_pfn - max(range_start_pfn, prev_end_pfn); return hole_pages; } /** * absent_pages_in_range - Return number of page frames in holes within a range * @start_pfn: The start PFN to start searching for holes * @end_pfn: The end PFN to stop searching for holes * * It returns the number of pages frames in memory holes within a range. */ unsigned long __init absent_pages_in_range(unsigned long start_pfn, unsigned long end_pfn) { return __absent_pages_in_range(MAX_NUMNODES, start_pfn, end_pfn); } /* Return the number of page frames in holes in a zone on a node */ unsigned long __init zone_absent_pages_in_node(int nid, unsigned long zone_type, unsigned long *ignored) { unsigned long node_start_pfn, node_end_pfn; unsigned long zone_start_pfn, zone_end_pfn; get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn); zone_start_pfn = max(arch_zone_lowest_possible_pfn[zone_type], node_start_pfn); zone_end_pfn = min(arch_zone_highest_possible_pfn[zone_type], node_end_pfn); return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn); } #else static inline unsigned long zone_spanned_pages_in_node(int nid, unsigned long zone_type, unsigned long *zones_size) { return zones_size[zone_type]; } static inline unsigned long zone_absent_pages_in_node(int nid, unsigned long zone_type, unsigned long *zholes_size) { if (!zholes_size) return 0; return zholes_size[zone_type]; } #endif static void __init calculate_node_totalpages(struct pglist_data *pgdat, unsigned long *zones_size, unsigned long *zholes_size) { unsigned long realtotalpages, totalpages = 0; enum zone_type i; for (i = 0; i < MAX_NR_ZONES; i++) totalpages += zone_spanned_pages_in_node(pgdat->node_id, i, zones_size); pgdat->node_spanned_pages = totalpages; realtotalpages = totalpages; for (i = 0; i < MAX_NR_ZONES; i++) realtotalpages -= zone_absent_pages_in_node(pgdat->node_id, i, zholes_size); pgdat->node_present_pages = realtotalpages; printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id, realtotalpages); } /* * Set up the zone data structures: * - mark all pages reserved * - mark all memory queues empty * - clear the memory bitmaps */ static void __meminit free_area_init_core(struct pglist_data *pgdat, unsigned long *zones_size, unsigned long *zholes_size) { enum zone_type j; int nid = pgdat->node_id; unsigned long zone_start_pfn = pgdat->node_start_pfn; int ret; pgdat_resize_init(pgdat); pgdat->nr_zones = 0; init_waitqueue_head(&pgdat->kswapd_wait); pgdat->kswapd_max_order = 0; for (j = 0; j < MAX_NR_ZONES; j++) { struct zone *zone = pgdat->node_zones + j; unsigned long size, realsize, memmap_pages; size = zone_spanned_pages_in_node(nid, j, zones_size); realsize = size - zone_absent_pages_in_node(nid, j, zholes_size); /* * Adjust realsize so that it accounts for how much memory * is used by this zone for memmap. This affects the watermark * and per-cpu initialisations */ memmap_pages = (size * sizeof(struct page)) >> PAGE_SHIFT; if (realsize >= memmap_pages) { realsize -= memmap_pages; printk(KERN_DEBUG " %s zone: %lu pages used for memmap\n", zone_names[j], memmap_pages); } else printk(KERN_WARNING " %s zone: %lu pages exceeds realsize %lu\n", zone_names[j], memmap_pages, realsize); /* Account for reserved DMA pages */ if (j == ZONE_DMA && realsize > dma_reserve) { realsize -= dma_reserve; printk(KERN_DEBUG " DMA zone: %lu pages reserved\n", dma_reserve); } if (!is_highmem_idx(j)) nr_kernel_pages += realsize; nr_all_pages += realsize; zone->spanned_pages = size; zone->present_pages = realsize; #ifdef CONFIG_NUMA zone->node = nid; zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio) / 100; zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100; #endif zone->name = zone_names[j]; spin_lock_init(&zone->lock); spin_lock_init(&zone->lru_lock); zone_seqlock_init(zone); zone->zone_pgdat = pgdat; zone->free_pages = 0; zone->prev_priority = DEF_PRIORITY; zone_pcp_init(zone); INIT_LIST_HEAD(&zone->active_list); INIT_LIST_HEAD(&zone->inactive_list); zone->nr_scan_active = 0; zone->nr_scan_inactive = 0; zone->nr_active = 0; zone->nr_inactive = 0; zap_zone_vm_stats(zone); atomic_set(&zone->reclaim_in_progress, 0); if (!size) continue; ret = init_currently_empty_zone(zone, zone_start_pfn, size); BUG_ON(ret); zone_start_pfn += size; } } static void __init alloc_node_mem_map(struct pglist_data *pgdat) { /* Skip empty nodes */ if (!pgdat->node_spanned_pages) return; #ifdef CONFIG_FLAT_NODE_MEM_MAP /* ia64 gets its own node_mem_map, before this, without bootmem */ if (!pgdat->node_mem_map) { unsigned long size, start, end; struct page *map; /* * The zone's endpoints aren't required to be MAX_ORDER * aligned but the node_mem_map endpoints must be in order * for the buddy allocator to function correctly. */ start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1); end = pgdat->node_start_pfn + pgdat->node_spanned_pages; end = ALIGN(end, MAX_ORDER_NR_PAGES); size = (end - start) * sizeof(struct page); map = alloc_remap(pgdat->node_id, size); if (!map) map = alloc_bootmem_node(pgdat, size); pgdat->node_mem_map = map + (pgdat->node_start_pfn - start); } #ifdef CONFIG_FLATMEM /* * With no DISCONTIG, the global mem_map is just set as node 0's */ if (pgdat == NODE_DATA(0)) { mem_map = NODE_DATA(0)->node_mem_map; #ifdef CONFIG_ARCH_POPULATES_NODE_MAP if (page_to_pfn(mem_map) != pgdat->node_start_pfn) mem_map -= pgdat->node_start_pfn; #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ } #endif #endif /* CONFIG_FLAT_NODE_MEM_MAP */ } void __meminit free_area_init_node(int nid, struct pglist_data *pgdat, unsigned long *zones_size, unsigned long node_start_pfn, unsigned long *zholes_size) { pgdat->node_id = nid; pgdat->node_start_pfn = node_start_pfn; calculate_node_totalpages(pgdat, zones_size, zholes_size); alloc_node_mem_map(pgdat); free_area_init_core(pgdat, zones_size, zholes_size); } #ifdef CONFIG_ARCH_POPULATES_NODE_MAP /** * add_active_range - Register a range of PFNs backed by physical memory * @nid: The node ID the range resides on * @start_pfn: The start PFN of the available physical memory * @end_pfn: The end PFN of the available physical memory * * These ranges are stored in an early_node_map[] and later used by * free_area_init_nodes() to calculate zone sizes and holes. If the * range spans a memory hole, it is up to the architecture to ensure * the memory is not freed by the bootmem allocator. If possible * the range being registered will be merged with existing ranges. */ void __init add_active_range(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn) { int i; printk(KERN_DEBUG "Entering add_active_range(%d, %lu, %lu) " "%d entries of %d used\n", nid, start_pfn, end_pfn, nr_nodemap_entries, MAX_ACTIVE_REGIONS); /* Merge with existing active regions if possible */ for (i = 0; i < nr_nodemap_entries; i++) { if (early_node_map[i].nid != nid) continue; /* Skip if an existing region covers this new one */ if (start_pfn >= early_node_map[i].start_pfn && end_pfn <= early_node_map[i].end_pfn) return; /* Merge forward if suitable */ if (start_pfn <= early_node_map[i].end_pfn && end_pfn > early_node_map[i].end_pfn) { early_node_map[i].end_pfn = end_pfn; return; } /* Merge backward if suitable */ if (start_pfn < early_node_map[i].end_pfn && end_pfn >= early_node_map[i].start_pfn) { early_node_map[i].start_pfn = start_pfn; return; } } /* Check that early_node_map is large enough */ if (i >= MAX_ACTIVE_REGIONS) { printk(KERN_CRIT "More than %d memory regions, truncating\n", MAX_ACTIVE_REGIONS); return; } early_node_map[i].nid = nid; early_node_map[i].start_pfn = start_pfn; early_node_map[i].end_pfn = end_pfn; nr_nodemap_entries = i + 1; } /** * shrink_active_range - Shrink an existing registered range of PFNs * @nid: The node id the range is on that should be shrunk * @old_end_pfn: The old end PFN of the range * @new_end_pfn: The new PFN of the range * * i386 with NUMA use alloc_remap() to store a node_mem_map on a local node. * The map is kept at the end physical page range that has already been * registered with add_active_range(). This function allows an arch to shrink * an existing registered range. */ void __init shrink_active_range(unsigned int nid, unsigned long old_end_pfn, unsigned long new_end_pfn) { int i; /* Find the old active region end and shrink */ for_each_active_range_index_in_nid(i, nid) if (early_node_map[i].end_pfn == old_end_pfn) { early_node_map[i].end_pfn = new_end_pfn; break; } } /** * remove_all_active_ranges - Remove all currently registered regions * * During discovery, it may be found that a table like SRAT is invalid * and an alternative discovery method must be used. This function removes * all currently registered regions. */ void __init remove_all_active_ranges(void) { memset(early_node_map, 0, sizeof(early_node_map)); nr_nodemap_entries = 0; #ifdef CONFIG_MEMORY_HOTPLUG_RESERVE memset(node_boundary_start_pfn, 0, sizeof(node_boundary_start_pfn)); memset(node_boundary_end_pfn, 0, sizeof(node_boundary_end_pfn)); #endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */ } /* Compare two active node_active_regions */ static int __init cmp_node_active_region(const void *a, const void *b) { struct node_active_region *arange = (struct node_active_region *)a; struct node_active_region *brange = (struct node_active_region *)b; /* Done this way to avoid overflows */ if (arange->start_pfn > brange->start_pfn) return 1; if (arange->start_pfn < brange->start_pfn) return -1; return 0; } /* sort the node_map by start_pfn */ static void __init sort_node_map(void) { sort(early_node_map, (size_t)nr_nodemap_entries, sizeof(struct node_active_region), cmp_node_active_region, NULL); } /* Find the lowest pfn for a node. This depends on a sorted early_node_map */ unsigned long __init find_min_pfn_for_node(unsigned long nid) { int i; /* Regions in the early_node_map can be in any order */ sort_node_map(); /* Assuming a sorted map, the first range found has the starting pfn */ for_each_active_range_index_in_nid(i, nid) return early_node_map[i].start_pfn; printk(KERN_WARNING "Could not find start_pfn for node %lu\n", nid); return 0; } /** * find_min_pfn_with_active_regions - Find the minimum PFN registered * * It returns the minimum PFN based on information provided via * add_active_range(). */ unsigned long __init find_min_pfn_with_active_regions(void) { return find_min_pfn_for_node(MAX_NUMNODES); } /** * find_max_pfn_with_active_regions - Find the maximum PFN registered * * It returns the maximum PFN based on information provided via * add_active_range(). */ unsigned long __init find_max_pfn_with_active_regions(void) { int i; unsigned long max_pfn = 0; for (i = 0; i < nr_nodemap_entries; i++) max_pfn = max(max_pfn, early_node_map[i].end_pfn); return max_pfn; } /** * free_area_init_nodes - Initialise all pg_data_t and zone data * @max_zone_pfn: an array of max PFNs for each zone * * This will call free_area_init_node() for each active node in the system. * Using the page ranges provided by add_active_range(), the size of each * zone in each node and their holes is calculated. If the maximum PFN * between two adjacent zones match, it is assumed that the zone is empty. * For example, if arch_max_dma_pfn == arch_max_dma32_pfn, it is assumed * that arch_max_dma32_pfn has no pages. It is also assumed that a zone * starts where the previous one ended. For example, ZONE_DMA32 starts * at arch_max_dma_pfn. */ void __init free_area_init_nodes(unsigned long *max_zone_pfn) { unsigned long nid; enum zone_type i; /* Record where the zone boundaries are */ memset(arch_zone_lowest_possible_pfn, 0, sizeof(arch_zone_lowest_possible_pfn)); memset(arch_zone_highest_possible_pfn, 0, sizeof(arch_zone_highest_possible_pfn)); arch_zone_lowest_possible_pfn[0] = find_min_pfn_with_active_regions(); arch_zone_highest_possible_pfn[0] = max_zone_pfn[0]; for (i = 1; i < MAX_NR_ZONES; i++) { arch_zone_lowest_possible_pfn[i] = arch_zone_highest_possible_pfn[i-1]; arch_zone_highest_possible_pfn[i] = max(max_zone_pfn[i], arch_zone_lowest_possible_pfn[i]); } /* Print out the zone ranges */ printk("Zone PFN ranges:\n"); for (i = 0; i < MAX_NR_ZONES; i++) printk(" %-8s %8lu -> %8lu\n", zone_names[i], arch_zone_lowest_possible_pfn[i], arch_zone_highest_possible_pfn[i]); /* Print out the early_node_map[] */ printk("early_node_map[%d] active PFN ranges\n", nr_nodemap_entries); for (i = 0; i < nr_nodemap_entries; i++) printk(" %3d: %8lu -> %8lu\n", early_node_map[i].nid, early_node_map[i].start_pfn, early_node_map[i].end_pfn); /* Initialise every node */ for_each_online_node(nid) { pg_data_t *pgdat = NODE_DATA(nid); free_area_init_node(nid, pgdat, NULL, find_min_pfn_for_node(nid), NULL); } } #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ /** * set_dma_reserve - set the specified number of pages reserved in the first zone * @new_dma_reserve: The number of pages to mark reserved * * The per-cpu batchsize and zone watermarks are determined by present_pages. * In the DMA zone, a significant percentage may be consumed by kernel image * and other unfreeable allocations which can skew the watermarks badly. This * function may optionally be used to account for unfreeable pages in the * first zone (e.g., ZONE_DMA). The effect will be lower watermarks and * smaller per-cpu batchsize. */ void __init set_dma_reserve(unsigned long new_dma_reserve) { dma_reserve = new_dma_reserve; } #ifndef CONFIG_NEED_MULTIPLE_NODES static bootmem_data_t contig_bootmem_data; struct pglist_data contig_page_data = { .bdata = &contig_bootmem_data }; EXPORT_SYMBOL(contig_page_data); #endif void __init free_area_init(unsigned long *zones_size) { free_area_init_node(0, NODE_DATA(0), zones_size, __pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL); } static int page_alloc_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { int cpu = (unsigned long)hcpu; if (action == CPU_DEAD) { local_irq_disable(); __drain_pages(cpu); vm_events_fold_cpu(cpu); local_irq_enable(); refresh_cpu_vm_stats(cpu); } return NOTIFY_OK; } void __init page_alloc_init(void) { hotcpu_notifier(page_alloc_cpu_notify, 0); } /* * calculate_totalreserve_pages - called when sysctl_lower_zone_reserve_ratio * or min_free_kbytes changes. */ static void calculate_totalreserve_pages(void) { struct pglist_data *pgdat; unsigned long reserve_pages = 0; enum zone_type i, j; for_each_online_pgdat(pgdat) { for (i = 0; i < MAX_NR_ZONES; i++) { struct zone *zone = pgdat->node_zones + i; unsigned long max = 0; /* Find valid and maximum lowmem_reserve in the zone */ for (j = i; j < MAX_NR_ZONES; j++) { if (zone->lowmem_reserve[j] > max) max = zone->lowmem_reserve[j]; } /* we treat pages_high as reserved pages. */ max += zone->pages_high; if (max > zone->present_pages) max = zone->present_pages; reserve_pages += max; } } totalreserve_pages = reserve_pages; } /* * setup_per_zone_lowmem_reserve - called whenever * sysctl_lower_zone_reserve_ratio changes. Ensures that each zone * has a correct pages reserved value, so an adequate number of * pages are left in the zone after a successful __alloc_pages(). */ static void setup_per_zone_lowmem_reserve(void) { struct pglist_data *pgdat; enum zone_type j, idx; for_each_online_pgdat(pgdat) { for (j = 0; j < MAX_NR_ZONES; j++) { struct zone *zone = pgdat->node_zones + j; unsigned long present_pages = zone->present_pages; zone->lowmem_reserve[j] = 0; idx = j; while (idx) { struct zone *lower_zone; idx--; if (sysctl_lowmem_reserve_ratio[idx] < 1) sysctl_lowmem_reserve_ratio[idx] = 1; lower_zone = pgdat->node_zones + idx; lower_zone->lowmem_reserve[j] = present_pages / sysctl_lowmem_reserve_ratio[idx]; present_pages += lower_zone->present_pages; } } } /* update totalreserve_pages */ calculate_totalreserve_pages(); } /** * setup_per_zone_pages_min - called when min_free_kbytes changes. * * Ensures that the pages_{min,low,high} values for each zone are set correctly * with respect to min_free_kbytes. */ void setup_per_zone_pages_min(void) { unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10); unsigned long lowmem_pages = 0; struct zone *zone; unsigned long flags; /* Calculate total number of !ZONE_HIGHMEM pages */ for_each_zone(zone) { if (!is_highmem(zone)) lowmem_pages += zone->present_pages; } for_each_zone(zone) { u64 tmp; spin_lock_irqsave(&zone->lru_lock, flags); tmp = (u64)pages_min * zone->present_pages; do_div(tmp, lowmem_pages); if (is_highmem(zone)) { /* * __GFP_HIGH and PF_MEMALLOC allocations usually don't * need highmem pages, so cap pages_min to a small * value here. * * The (pages_high-pages_low) and (pages_low-pages_min) * deltas controls asynch page reclaim, and so should * not be capped for highmem. */ int min_pages; min_pages = zone->present_pages / 1024; if (min_pages < SWAP_CLUSTER_MAX) min_pages = SWAP_CLUSTER_MAX; if (min_pages > 128) min_pages = 128; zone->pages_min = min_pages; } else { /* * If it's a lowmem zone, reserve a number of pages * proportionate to the zone's size. */ zone->pages_min = tmp; } zone->pages_low = zone->pages_min + (tmp >> 2); zone->pages_high = zone->pages_min + (tmp >> 1); spin_unlock_irqrestore(&zone->lru_lock, flags); } /* update totalreserve_pages */ calculate_totalreserve_pages(); } /* * Initialise min_free_kbytes. * * For small machines we want it small (128k min). For large machines * we want it large (64MB max). But it is not linear, because network * bandwidth does not increase linearly with machine size. We use * * min_free_kbytes = 4 * sqrt(lowmem_kbytes), for better accuracy: * min_free_kbytes = sqrt(lowmem_kbytes * 16) * * which yields * * 16MB: 512k * 32MB: 724k * 64MB: 1024k * 128MB: 1448k * 256MB: 2048k * 512MB: 2896k * 1024MB: 4096k * 2048MB: 5792k * 4096MB: 8192k * 8192MB: 11584k * 16384MB: 16384k */ static int __init init_per_zone_pages_min(void) { unsigned long lowmem_kbytes; lowmem_kbytes = nr_free_buffer_pages() * (PAGE_SIZE >> 10); min_free_kbytes = int_sqrt(lowmem_kbytes * 16); if (min_free_kbytes < 128) min_free_kbytes = 128; if (min_free_kbytes > 65536) min_free_kbytes = 65536; setup_per_zone_pages_min(); setup_per_zone_lowmem_reserve(); return 0; } module_init(init_per_zone_pages_min) /* * min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so * that we can call two helper functions whenever min_free_kbytes * changes. */ int min_free_kbytes_sysctl_handler(ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, loff_t *ppos) { proc_dointvec(table, write, file, buffer, length, ppos); setup_per_zone_pages_min(); return 0; } #ifdef CONFIG_NUMA int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, loff_t *ppos) { struct zone *zone; int rc; rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); if (rc) return rc; for_each_zone(zone) zone->min_unmapped_pages = (zone->present_pages * sysctl_min_unmapped_ratio) / 100; return 0; } int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, loff_t *ppos) { struct zone *zone; int rc; rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); if (rc) return rc; for_each_zone(zone) zone->min_slab_pages = (zone->present_pages * sysctl_min_slab_ratio) / 100; return 0; } #endif /* * lowmem_reserve_ratio_sysctl_handler - just a wrapper around * proc_dointvec() so that we can call setup_per_zone_lowmem_reserve() * whenever sysctl_lowmem_reserve_ratio changes. * * The reserve ratio obviously has absolutely no relation with the * pages_min watermarks. The lowmem reserve ratio can only make sense * if in function of the boot time zone sizes. */ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, loff_t *ppos) { proc_dointvec_minmax(table, write, file, buffer, length, ppos); setup_per_zone_lowmem_reserve(); return 0; } /* * percpu_pagelist_fraction - changes the pcp->high for each zone on each * cpu. It is the fraction of total pages in each zone that a hot per cpu pagelist * can have before it gets flushed back to buddy allocator. */ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, loff_t *ppos) { struct zone *zone; unsigned int cpu; int ret; ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos); if (!write || (ret == -EINVAL)) return ret; for_each_zone(zone) { for_each_online_cpu(cpu) { unsigned long high; high = zone->present_pages / percpu_pagelist_fraction; setup_pagelist_highmark(zone_pcp(zone, cpu), high); } } return 0; } int hashdist = HASHDIST_DEFAULT; #ifdef CONFIG_NUMA static int __init set_hashdist(char *str) { if (!str) return 0; hashdist = simple_strtoul(str, &str, 0); return 1; } __setup("hashdist=", set_hashdist); #endif /* * allocate a large system hash table from bootmem * - it is assumed that the hash table must contain an exact power-of-2 * quantity of entries * - limit is the number of hash buckets, not the total allocation size */ void *__init alloc_large_system_hash(const char *tablename, unsigned long bucketsize, unsigned long numentries, int scale, int flags, unsigned int *_hash_shift, unsigned int *_hash_mask, unsigned long limit) { unsigned long long max = limit; unsigned long log2qty, size; void *table = NULL; /* allow the kernel cmdline to have a say */ if (!numentries) { /* round applicable memory size up to nearest megabyte */ numentries = nr_kernel_pages; numentries += (1UL << (20 - PAGE_SHIFT)) - 1; numentries >>= 20 - PAGE_SHIFT; numentries <<= 20 - PAGE_SHIFT; /* limit to 1 bucket per 2^scale bytes of low memory */ if (scale > PAGE_SHIFT) numentries >>= (scale - PAGE_SHIFT); else numentries <<= (PAGE_SHIFT - scale); } numentries = roundup_pow_of_two(numentries); /* limit allocation size to 1/16 total memory by default */ if (max == 0) { max = ((unsigned long long)nr_all_pages << PAGE_SHIFT) >> 4; do_div(max, bucketsize); } if (numentries > max) numentries = max; log2qty = long_log2(numentries); do { size = bucketsize << log2qty; if (flags & HASH_EARLY) table = alloc_bootmem(size); else if (hashdist) table = __vmalloc(size, GFP_ATOMIC, PAGE_KERNEL); else { unsigned long order; for (order = 0; ((1UL << order) << PAGE_SHIFT) < size; order++) ; table = (void*) __get_free_pages(GFP_ATOMIC, order); } } while (!table && size > PAGE_SIZE && --log2qty); if (!table) panic("Failed to allocate %s hash table\n", tablename); printk("%s hash table entries: %d (order: %d, %lu bytes)\n", tablename, (1U << log2qty), long_log2(size) - PAGE_SHIFT, size); if (_hash_shift) *_hash_shift = log2qty; if (_hash_mask) *_hash_mask = (1 << log2qty) - 1; return table; } #ifdef CONFIG_OUT_OF_LINE_PFN_TO_PAGE struct page *pfn_to_page(unsigned long pfn) { return __pfn_to_page(pfn); } unsigned long page_to_pfn(struct page *page) { return __page_to_pfn(page); } EXPORT_SYMBOL(pfn_to_page); EXPORT_SYMBOL(page_to_pfn); #endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */ #if MAX_NUMNODES > 1 /* * Find the highest possible node id. */ int highest_possible_node_id(void) { unsigned int node; unsigned int highest = 0; for_each_node_mask(node, node_possible_map) highest = node; return highest; } EXPORT_SYMBOL(highest_possible_node_id); #endif