aboutsummaryrefslogtreecommitdiffstats
path: root/scripts/bpf_helpers_doc.py
blob: 5010a4d5bfbabf68af666e1b43693e8656b12789 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
#!/usr/bin/python3
# SPDX-License-Identifier: GPL-2.0-only
#
# Copyright (C) 2018 Netronome Systems, Inc.

# In case user attempts to run with Python 2.
from __future__ import print_function

import argparse
import re
import sys, os

class NoHelperFound(BaseException):
    pass

class ParsingError(BaseException):
    def __init__(self, line='<line not provided>', reader=None):
        if reader:
            BaseException.__init__(self,
                                   'Error at file offset %d, parsing line: %s' %
                                   (reader.tell(), line))
        else:
            BaseException.__init__(self, 'Error parsing line: %s' % line)

class Helper(object):
    """
    An object representing the description of an eBPF helper function.
    @proto: function prototype of the helper function
    @desc: textual description of the helper function
    @ret: description of the return value of the helper function
    """
    def __init__(self, proto='', desc='', ret=''):
        self.proto = proto
        self.desc = desc
        self.ret = ret

    def proto_break_down(self):
        """
        Break down helper function protocol into smaller chunks: return type,
        name, distincts arguments.
        """
        arg_re = re.compile('((const )?(struct )?(\w+|...))( (\**)(\w+))?$')
        res = {}
        proto_re = re.compile('(.+) (\**)(\w+)\(((([^,]+)(, )?){1,5})\)$')

        capture = proto_re.match(self.proto)
        res['ret_type'] = capture.group(1)
        res['ret_star'] = capture.group(2)
        res['name']     = capture.group(3)
        res['args'] = []

        args    = capture.group(4).split(', ')
        for a in args:
            capture = arg_re.match(a)
            res['args'].append({
                'type' : capture.group(1),
                'star' : capture.group(6),
                'name' : capture.group(7)
            })

        return res

class HeaderParser(object):
    """
    An object used to parse a file in order to extract the documentation of a
    list of eBPF helper functions. All the helpers that can be retrieved are
    stored as Helper object, in the self.helpers() array.
    @filename: name of file to parse, usually include/uapi/linux/bpf.h in the
               kernel tree
    """
    def __init__(self, filename):
        self.reader = open(filename, 'r')
        self.line = ''
        self.helpers = []

    def parse_helper(self):
        proto    = self.parse_proto()
        desc     = self.parse_desc()
        ret      = self.parse_ret()
        return Helper(proto=proto, desc=desc, ret=ret)

    def parse_proto(self):
        # Argument can be of shape:
        #   - "void"
        #   - "type  name"
        #   - "type *name"
        #   - Same as above, with "const" and/or "struct" in front of type
        #   - "..." (undefined number of arguments, for bpf_trace_printk())
        # There is at least one term ("void"), and at most five arguments.
        p = re.compile(' \* ?((.+) \**\w+\((((const )?(struct )?(\w+|\.\.\.)( \**\w+)?)(, )?){1,5}\))$')
        capture = p.match(self.line)
        if not capture:
            raise NoHelperFound
        self.line = self.reader.readline()
        return capture.group(1)

    def parse_desc(self):
        p = re.compile(' \* ?(?:\t| {5,8})Description$')
        capture = p.match(self.line)
        if not capture:
            # Helper can have empty description and we might be parsing another
            # attribute: return but do not consume.
            return ''
        # Description can be several lines, some of them possibly empty, and it
        # stops when another subsection title is met.
        desc = ''
        while True:
            self.line = self.reader.readline()
            if self.line == ' *\n':
                desc += '\n'
            else:
                p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
                capture = p.match(self.line)
                if capture:
                    desc += capture.group(1) + '\n'
                else:
                    break
        return desc

    def parse_ret(self):
        p = re.compile(' \* ?(?:\t| {5,8})Return$')
        capture = p.match(self.line)
        if not capture:
            # Helper can have empty retval and we might be parsing another
            # attribute: return but do not consume.
            return ''
        # Return value description can be several lines, some of them possibly
        # empty, and it stops when another subsection title is met.
        ret = ''
        while True:
            self.line = self.reader.readline()
            if self.line == ' *\n':
                ret += '\n'
            else:
                p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
                capture = p.match(self.line)
                if capture:
                    ret += capture.group(1) + '\n'
                else:
                    break
        return ret

    def run(self):
        # Advance to start of helper function descriptions.
        offset = self.reader.read().find('* Start of BPF helper function descriptions:')
        if offset == -1:
            raise Exception('Could not find start of eBPF helper descriptions list')
        self.reader.seek(offset)
        self.reader.readline()
        self.reader.readline()
        self.line = self.reader.readline()

        while True:
            try:
                helper = self.parse_helper()
                self.helpers.append(helper)
            except NoHelperFound:
                break

        self.reader.close()
        print('Parsed description of %d helper function(s)' % len(self.helpers),
              file=sys.stderr)

###############################################################################

class Printer(object):
    """
    A generic class for printers. Printers should be created with an array of
    Helper objects, and implement a way to print them in the desired fashion.
    @helpers: array of Helper objects to print to standard output
    """
    def __init__(self, helpers):
        self.helpers = helpers

    def print_header(self):
        pass

    def print_footer(self):
        pass

    def print_one(self, helper):
        pass

    def print_all(self):
        self.print_header()
        for helper in self.helpers:
            self.print_one(helper)
        self.print_footer()

class PrinterRST(Printer):
    """
    A printer for dumping collected information about helpers as a ReStructured
    Text page compatible with the rst2man program, which can be used to
    generate a manual page for the helpers.
    @helpers: array of Helper objects to print to standard output
    """
    def print_header(self):
        header = '''\
.. Copyright (C) All BPF authors and contributors from 2014 to present.
.. See git log include/uapi/linux/bpf.h in kernel tree for details.
.. 
.. %%%LICENSE_START(VERBATIM)
.. Permission is granted to make and distribute verbatim copies of this
.. manual provided the copyright notice and this permission notice are
.. preserved on all copies.
.. 
.. Permission is granted to copy and distribute modified versions of this
.. manual under the conditions for verbatim copying, provided that the
.. entire resulting derived work is distributed under the terms of a
.. permission notice identical to this one.
.. 
.. Since the Linux kernel and libraries are constantly changing, this
.. manual page may be incorrect or out-of-date.  The author(s) assume no
.. responsibility for errors or omissions, or for damages resulting from
.. the use of the information contained herein.  The author(s) may not
.. have taken the same level of care in the production of this manual,
.. which is licensed free of charge, as they might when working
.. professionally.
.. 
.. Formatted or processed versions of this manual, if unaccompanied by
.. the source, must acknowledge the copyright and authors of this work.
.. %%%LICENSE_END
.. 
.. Please do not edit this file. It was generated from the documentation
.. located in file include/uapi/linux/bpf.h of the Linux kernel sources
.. (helpers description), and from scripts/bpf_helpers_doc.py in the same
.. repository (header and footer).

===========
BPF-HELPERS
===========
-------------------------------------------------------------------------------
list of eBPF helper functions
-------------------------------------------------------------------------------

:Manual section: 7

DESCRIPTION
===========

The extended Berkeley Packet Filter (eBPF) subsystem consists in programs
written in a pseudo-assembly language, then attached to one of the several
kernel hooks and run in reaction of specific events. This framework differs
from the older, "classic" BPF (or "cBPF") in several aspects, one of them being
the ability to call special functions (or "helpers") from within a program.
These functions are restricted to a white-list of helpers defined in the
kernel.

These helpers are used by eBPF programs to interact with the system, or with
the context in which they work. For instance, they can be used to print
debugging messages, to get the time since the system was booted, to interact
with eBPF maps, or to manipulate network packets. Since there are several eBPF
program types, and that they do not run in the same context, each program type
can only call a subset of those helpers.

Due to eBPF conventions, a helper can not have more than five arguments.

Internally, eBPF programs call directly into the compiled helper functions
without requiring any foreign-function interface. As a result, calling helpers
introduces no overhead, thus offering excellent performance.

This document is an attempt to list and document the helpers available to eBPF
developers. They are sorted by chronological order (the oldest helpers in the
kernel at the top).

HELPERS
=======
'''
        print(header)

    def print_footer(self):
        footer = '''
EXAMPLES
========

Example usage for most of the eBPF helpers listed in this manual page are
available within the Linux kernel sources, at the following locations:

* *samples/bpf/*
* *tools/testing/selftests/bpf/*

LICENSE
=======

eBPF programs can have an associated license, passed along with the bytecode
instructions to the kernel when the programs are loaded. The format for that
string is identical to the one in use for kernel modules (Dual licenses, such
as "Dual BSD/GPL", may be used). Some helper functions are only accessible to
programs that are compatible with the GNU Privacy License (GPL).

In order to use such helpers, the eBPF program must be loaded with the correct
license string passed (via **attr**) to the **bpf**\ () system call, and this
generally translates into the C source code of the program containing a line
similar to the following:

::

	char ____license[] __attribute__((section("license"), used)) = "GPL";

IMPLEMENTATION
==============

This manual page is an effort to document the existing eBPF helper functions.
But as of this writing, the BPF sub-system is under heavy development. New eBPF
program or map types are added, along with new helper functions. Some helpers
are occasionally made available for additional program types. So in spite of
the efforts of the community, this page might not be up-to-date. If you want to
check by yourself what helper functions exist in your kernel, or what types of
programs they can support, here are some files among the kernel tree that you
may be interested in:

* *include/uapi/linux/bpf.h* is the main BPF header. It contains the full list
  of all helper functions, as well as many other BPF definitions including most
  of the flags, structs or constants used by the helpers.
* *net/core/filter.c* contains the definition of most network-related helper
  functions, and the list of program types from which they can be used.
* *kernel/trace/bpf_trace.c* is the equivalent for most tracing program-related
  helpers.
* *kernel/bpf/verifier.c* contains the functions used to check that valid types
  of eBPF maps are used with a given helper function.
* *kernel/bpf/* directory contains other files in which additional helpers are
  defined (for cgroups, sockmaps, etc.).

Compatibility between helper functions and program types can generally be found
in the files where helper functions are defined. Look for the **struct
bpf_func_proto** objects and for functions returning them: these functions
contain a list of helpers that a given program type can call. Note that the
**default:** label of the **switch ... case** used to filter helpers can call
other functions, themselves allowing access to additional helpers. The
requirement for GPL license is also in those **struct bpf_func_proto**.

Compatibility between helper functions and map types can be found in the
**check_map_func_compatibility**\ () function in file *kernel/bpf/verifier.c*.

Helper functions that invalidate the checks on **data** and **data_end**
pointers for network processing are listed in function
**bpf_helper_changes_pkt_data**\ () in file *net/core/filter.c*.

SEE ALSO
========

**bpf**\ (2),
**cgroups**\ (7),
**ip**\ (8),
**perf_event_open**\ (2),
**sendmsg**\ (2),
**socket**\ (7),
**tc-bpf**\ (8)'''
        print(footer)

    def print_proto(self, helper):
        """
        Format function protocol with bold and italics markers. This makes RST
        file less readable, but gives nice results in the manual page.
        """
        proto = helper.proto_break_down()

        print('**%s %s%s(' % (proto['ret_type'],
                              proto['ret_star'].replace('*', '\\*'),
                              proto['name']),
              end='')

        comma = ''
        for a in proto['args']:
            one_arg = '{}{}'.format(comma, a['type'])
            if a['name']:
                if a['star']:
                    one_arg += ' {}**\ '.format(a['star'].replace('*', '\\*'))
                else:
                    one_arg += '** '
                one_arg += '*{}*\\ **'.format(a['name'])
            comma = ', '
            print(one_arg, end='')

        print(')**')

    def print_one(self, helper):
        self.print_proto(helper)

        if (helper.desc):
            print('\tDescription')
            # Do not strip all newline characters: formatted code at the end of
            # a section must be followed by a blank line.
            for line in re.sub('\n$', '', helper.desc, count=1).split('\n'):
                print('{}{}'.format('\t\t' if line else '', line))

        if (helper.ret):
            print('\tReturn')
            for line in helper.ret.rstrip().split('\n'):
                print('{}{}'.format('\t\t' if line else '', line))

        print('')

###############################################################################

# If script is launched from scripts/ from kernel tree and can access
# ../include/uapi/linux/bpf.h, use it as a default name for the file to parse,
# otherwise the --filename argument will be required from the command line.
script = os.path.abspath(sys.argv[0])
linuxRoot = os.path.dirname(os.path.dirname(script))
bpfh = os.path.join(linuxRoot, 'include/uapi/linux/bpf.h')

argParser = argparse.ArgumentParser(description="""
Parse eBPF header file and generate documentation for eBPF helper functions.
The RST-formatted output produced can be turned into a manual page with the
rst2man utility.
""")
if (os.path.isfile(bpfh)):
    argParser.add_argument('--filename', help='path to include/uapi/linux/bpf.h',
                           default=bpfh)
else:
    argParser.add_argument('--filename', help='path to include/uapi/linux/bpf.h')
args = argParser.parse_args()

# Parse file.
headerParser = HeaderParser(args.filename)
headerParser.run()

# Print formatted output to standard output.
printer = PrinterRST(headerParser.helpers)
printer.print_all()
79 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033
/* via-rhine.c: A Linux Ethernet device driver for VIA Rhine family chips. */
/*
	Written 1998-2001 by Donald Becker.

	Current Maintainer: Roger Luethi <rl@hellgate.ch>

	This software may be used and distributed according to the terms of
	the GNU General Public License (GPL), incorporated herein by reference.
	Drivers based on or derived from this code fall under the GPL and must
	retain the authorship, copyright and license notice.  This file is not
	a complete program and may only be used when the entire operating
	system is licensed under the GPL.

	This driver is designed for the VIA VT86C100A Rhine-I.
	It also works with the Rhine-II (6102) and Rhine-III (6105/6105L/6105LOM
	and management NIC 6105M).

	The author may be reached as becker@scyld.com, or C/O
	Scyld Computing Corporation
	410 Severn Ave., Suite 210
	Annapolis MD 21403


	This driver contains some changes from the original Donald Becker
	version. He may or may not be interested in bug reports on this
	code. You can find his versions at:
	http://www.scyld.com/network/via-rhine.html
	[link no longer provides useful info -jgarzik]

*/

#define DRV_NAME	"via-rhine"
#define DRV_VERSION	"1.4.3"
#define DRV_RELDATE	"2007-03-06"


/* A few user-configurable values.
   These may be modified when a driver module is loaded. */

static int debug = 1;	/* 1 normal messages, 0 quiet .. 7 verbose. */
static int max_interrupt_work = 20;

/* Set the copy breakpoint for the copy-only-tiny-frames scheme.
   Setting to > 1518 effectively disables this feature. */
#if defined(__alpha__) || defined(__arm__) || defined(__hppa__) \
       || defined(CONFIG_SPARC) || defined(__ia64__) \
       || defined(__sh__) || defined(__mips__)
static int rx_copybreak = 1518;
#else
static int rx_copybreak;
#endif

/* Work-around for broken BIOSes: they are unable to get the chip back out of
   power state D3 so PXE booting fails. bootparam(7): via-rhine.avoid_D3=1 */
static int avoid_D3;

/*
 * In case you are looking for 'options[]' or 'full_duplex[]', they
 * are gone. Use ethtool(8) instead.
 */

/* Maximum number of multicast addresses to filter (vs. rx-all-multicast).
   The Rhine has a 64 element 8390-like hash table. */
static const int multicast_filter_limit = 32;


/* Operational parameters that are set at compile time. */

/* Keep the ring sizes a power of two for compile efficiency.
   The compiler will convert <unsigned>'%'<2^N> into a bit mask.
   Making the Tx ring too large decreases the effectiveness of channel
   bonding and packet priority.
   There are no ill effects from too-large receive rings. */
#define TX_RING_SIZE	16
#define TX_QUEUE_LEN	10	/* Limit ring entries actually used. */
#define RX_RING_SIZE	64

/* Operational parameters that usually are not changed. */

/* Time in jiffies before concluding the transmitter is hung. */
#define TX_TIMEOUT	(2*HZ)

#define PKT_BUF_SZ	1536	/* Size of each temporary Rx buffer.*/

#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/timer.h>
#include <linux/errno.h>
#include <linux/ioport.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/pci.h>
#include <linux/dma-mapping.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/delay.h>
#include <linux/mii.h>
#include <linux/ethtool.h>
#include <linux/crc32.h>
#include <linux/bitops.h>
#include <asm/processor.h>	/* Processor type for cache alignment. */
#include <asm/io.h>
#include <asm/irq.h>
#include <asm/uaccess.h>
#include <linux/dmi.h>

/* These identify the driver base version and may not be removed. */
static const char version[] __devinitconst =
	KERN_INFO DRV_NAME ".c:v1.10-LK" DRV_VERSION " " DRV_RELDATE
	" Written by Donald Becker\n";

/* This driver was written to use PCI memory space. Some early versions
   of the Rhine may only work correctly with I/O space accesses. */
#ifdef CONFIG_VIA_RHINE_MMIO
#define USE_MMIO
#else
#endif

MODULE_AUTHOR("Donald Becker <becker@scyld.com>");
MODULE_DESCRIPTION("VIA Rhine PCI Fast Ethernet driver");
MODULE_LICENSE("GPL");

module_param(max_interrupt_work, int, 0);
module_param(debug, int, 0);
module_param(rx_copybreak, int, 0);
module_param(avoid_D3, bool, 0);
MODULE_PARM_DESC(max_interrupt_work, "VIA Rhine maximum events handled per interrupt");
MODULE_PARM_DESC(debug, "VIA Rhine debug level (0-7)");
MODULE_PARM_DESC(rx_copybreak, "VIA Rhine copy breakpoint for copy-only-tiny-frames");
MODULE_PARM_DESC(avoid_D3, "Avoid power state D3 (work-around for broken BIOSes)");

/*
		Theory of Operation

I. Board Compatibility

This driver is designed for the VIA 86c100A Rhine-II PCI Fast Ethernet
controller.

II. Board-specific settings

Boards with this chip are functional only in a bus-master PCI slot.

Many operational settings are loaded from the EEPROM to the Config word at
offset 0x78. For most of these settings, this driver assumes that they are
correct.
If this driver is compiled to use PCI memory space operations the EEPROM
must be configured to enable memory ops.

III. Driver operation

IIIa. Ring buffers

This driver uses two statically allocated fixed-size descriptor lists
formed into rings by a branch from the final descriptor to the beginning of
the list. The ring sizes are set at compile time by RX/TX_RING_SIZE.

IIIb/c. Transmit/Receive Structure

This driver attempts to use a zero-copy receive and transmit scheme.

Alas, all data buffers are required to start on a 32 bit boundary, so
the driver must often copy transmit packets into bounce buffers.

The driver allocates full frame size skbuffs for the Rx ring buffers at
open() time and passes the skb->data field to the chip as receive data
buffers. When an incoming frame is less than RX_COPYBREAK bytes long,
a fresh skbuff is allocated and the frame is copied to the new skbuff.
When the incoming frame is larger, the skbuff is passed directly up the
protocol stack. Buffers consumed this way are replaced by newly allocated
skbuffs in the last phase of rhine_rx().

The RX_COPYBREAK value is chosen to trade-off the memory wasted by
using a full-sized skbuff for small frames vs. the copying costs of larger
frames. New boards are typically used in generously configured machines
and the underfilled buffers have negligible impact compared to the benefit of
a single allocation size, so the default value of zero results in never
copying packets. When copying is done, the cost is usually mitigated by using
a combined copy/checksum routine. Copying also preloads the cache, which is
most useful with small frames.

Since the VIA chips are only able to transfer data to buffers on 32 bit
boundaries, the IP header at offset 14 in an ethernet frame isn't
longword aligned for further processing. Copying these unaligned buffers
has the beneficial effect of 16-byte aligning the IP header.

IIId. Synchronization

The driver runs as two independent, single-threaded flows of control. One
is the send-packet routine, which enforces single-threaded use by the
netdev_priv(dev)->lock spinlock. The other thread is the interrupt handler,
which is single threaded by the hardware and interrupt handling software.

The send packet thread has partial control over the Tx ring. It locks the
netdev_priv(dev)->lock whenever it's queuing a Tx packet. If the next slot in
the ring is not available it stops the transmit queue by
calling netif_stop_queue.

The interrupt handler has exclusive control over the Rx ring and records stats
from the Tx ring. After reaping the stats, it marks the Tx queue entry as
empty by incrementing the dirty_tx mark. If at least half of the entries in
the Rx ring are available the transmit queue is woken up if it was stopped.

IV. Notes

IVb. References

Preliminary VT86C100A manual from http://www.via.com.tw/
http://www.scyld.com/expert/100mbps.html
http://www.scyld.com/expert/NWay.html
ftp://ftp.via.com.tw/public/lan/Products/NIC/VT86C100A/Datasheet/VT86C100A03.pdf
ftp://ftp.via.com.tw/public/lan/Products/NIC/VT6102/Datasheet/VT6102_021.PDF


IVc. Errata

The VT86C100A manual is not reliable information.
The 3043 chip does not handle unaligned transmit or receive buffers, resulting
in significant performance degradation for bounce buffer copies on transmit
and unaligned IP headers on receive.
The chip does not pad to minimum transmit length.

*/


/* This table drives the PCI probe routines. It's mostly boilerplate in all
   of the drivers, and will likely be provided by some future kernel.
   Note the matching code -- the first table entry matchs all 56** cards but
   second only the 1234 card.
*/

enum rhine_revs {
	VT86C100A	= 0x00,
	VTunknown0	= 0x20,
	VT6102		= 0x40,
	VT8231		= 0x50,	/* Integrated MAC */
	VT8233		= 0x60,	/* Integrated MAC */
	VT8235		= 0x74,	/* Integrated MAC */
	VT8237		= 0x78,	/* Integrated MAC */
	VTunknown1	= 0x7C,
	VT6105		= 0x80,
	VT6105_B0	= 0x83,
	VT6105L		= 0x8A,
	VT6107		= 0x8C,
	VTunknown2	= 0x8E,
	VT6105M		= 0x90,	/* Management adapter */
};

enum rhine_quirks {
	rqWOL		= 0x0001,	/* Wake-On-LAN support */
	rqForceReset	= 0x0002,
	rq6patterns	= 0x0040,	/* 6 instead of 4 patterns for WOL */
	rqStatusWBRace	= 0x0080,	/* Tx Status Writeback Error possible */
	rqRhineI	= 0x0100,	/* See comment below */
};
/*
 * rqRhineI: VT86C100A (aka Rhine-I) uses different bits to enable
 * MMIO as well as for the collision counter and the Tx FIFO underflow
 * indicator. In addition, Tx and Rx buffers need to 4 byte aligned.
 */

/* Beware of PCI posted writes */
#define IOSYNC	do { ioread8(ioaddr + StationAddr); } while (0)

static const struct pci_device_id rhine_pci_tbl[] = {
	{ 0x1106, 0x3043, PCI_ANY_ID, PCI_ANY_ID, },	/* VT86C100A */
	{ 0x1106, 0x3065, PCI_ANY_ID, PCI_ANY_ID, },	/* VT6102 */
	{ 0x1106, 0x3106, PCI_ANY_ID, PCI_ANY_ID, },	/* 6105{,L,LOM} */
	{ 0x1106, 0x3053, PCI_ANY_ID, PCI_ANY_ID, },	/* VT6105M */
	{ }	/* terminate list */
};
MODULE_DEVICE_TABLE(pci, rhine_pci_tbl);


/* Offsets to the device registers. */
enum register_offsets {
	StationAddr=0x00, RxConfig=0x06, TxConfig=0x07, ChipCmd=0x08,
	ChipCmd1=0x09,
	IntrStatus=0x0C, IntrEnable=0x0E,
	MulticastFilter0=0x10, MulticastFilter1=0x14,
	RxRingPtr=0x18, TxRingPtr=0x1C, GFIFOTest=0x54,
	MIIPhyAddr=0x6C, MIIStatus=0x6D, PCIBusConfig=0x6E,
	MIICmd=0x70, MIIRegAddr=0x71, MIIData=0x72, MACRegEEcsr=0x74,
	ConfigA=0x78, ConfigB=0x79, ConfigC=0x7A, ConfigD=0x7B,
	RxMissed=0x7C, RxCRCErrs=0x7E, MiscCmd=0x81,
	StickyHW=0x83, IntrStatus2=0x84,
	WOLcrSet=0xA0, PwcfgSet=0xA1, WOLcgSet=0xA3, WOLcrClr=0xA4,
	WOLcrClr1=0xA6, WOLcgClr=0xA7,
	PwrcsrSet=0xA8, PwrcsrSet1=0xA9, PwrcsrClr=0xAC, PwrcsrClr1=0xAD,
};

/* Bits in ConfigD */
enum backoff_bits {
	BackOptional=0x01, BackModify=0x02,
	BackCaptureEffect=0x04, BackRandom=0x08
};

#ifdef USE_MMIO
/* Registers we check that mmio and reg are the same. */
static const int mmio_verify_registers[] = {
	RxConfig, TxConfig, IntrEnable, ConfigA, ConfigB, ConfigC, ConfigD,
	0
};
#endif

/* Bits in the interrupt status/mask registers. */
enum intr_status_bits {
	IntrRxDone=0x0001, IntrRxErr=0x0004, IntrRxEmpty=0x0020,
	IntrTxDone=0x0002, IntrTxError=0x0008, IntrTxUnderrun=0x0210,
	IntrPCIErr=0x0040,
	IntrStatsMax=0x0080, IntrRxEarly=0x0100,
	IntrRxOverflow=0x0400, IntrRxDropped=0x0800, IntrRxNoBuf=0x1000,
	IntrTxAborted=0x2000, IntrLinkChange=0x4000,
	IntrRxWakeUp=0x8000,
	IntrNormalSummary=0x0003, IntrAbnormalSummary=0xC260,
	IntrTxDescRace=0x080000,	/* mapped from IntrStatus2 */
	IntrTxErrSummary=0x082218,
};

/* Bits in WOLcrSet/WOLcrClr and PwrcsrSet/PwrcsrClr */
enum wol_bits {
	WOLucast	= 0x10,
	WOLmagic	= 0x20,
	WOLbmcast	= 0x30,
	WOLlnkon	= 0x40,
	WOLlnkoff	= 0x80,
};

/* The Rx and Tx buffer descriptors. */
struct rx_desc {
	__le32 rx_status;
	__le32 desc_length; /* Chain flag, Buffer/frame length */
	__le32 addr;
	__le32 next_desc;
};
struct tx_desc {
	__le32 tx_status;
	__le32 desc_length; /* Chain flag, Tx Config, Frame length */
	__le32 addr;
	__le32 next_desc;
};

/* Initial value for tx_desc.desc_length, Buffer size goes to bits 0-10 */
#define TXDESC		0x00e08000

enum rx_status_bits {
	RxOK=0x8000, RxWholePkt=0x0300, RxErr=0x008F
};

/* Bits in *_desc.*_status */
enum desc_status_bits {
	DescOwn=0x80000000
};

/* Bits in ChipCmd. */
enum chip_cmd_bits {
	CmdInit=0x01, CmdStart=0x02, CmdStop=0x04, CmdRxOn=0x08,
	CmdTxOn=0x10, Cmd1TxDemand=0x20, CmdRxDemand=0x40,
	Cmd1EarlyRx=0x01, Cmd1EarlyTx=0x02, Cmd1FDuplex=0x04,
	Cmd1NoTxPoll=0x08, Cmd1Reset=0x80,
};

struct rhine_private {
	/* Descriptor rings */
	struct rx_desc *rx_ring;
	struct tx_desc *tx_ring;
	dma_addr_t rx_ring_dma;
	dma_addr_t tx_ring_dma;

	/* The addresses of receive-in-place skbuffs. */
	struct sk_buff *rx_skbuff[RX_RING_SIZE];
	dma_addr_t rx_skbuff_dma[RX_RING_SIZE];

	/* The saved address of a sent-in-place packet/buffer, for later free(). */
	struct sk_buff *tx_skbuff[TX_RING_SIZE];
	dma_addr_t tx_skbuff_dma[TX_RING_SIZE];

	/* Tx bounce buffers (Rhine-I only) */
	unsigned char *tx_buf[TX_RING_SIZE];
	unsigned char *tx_bufs;
	dma_addr_t tx_bufs_dma;

	struct pci_dev *pdev;
	long pioaddr;
	struct net_device *dev;
	struct napi_struct napi;
	struct net_device_stats stats;
	spinlock_t lock;

	/* Frequently used values: keep some adjacent for cache effect. */
	u32 quirks;
	struct rx_desc *rx_head_desc;
	unsigned int cur_rx, dirty_rx;	/* Producer/consumer ring indices */
	unsigned int cur_tx, dirty_tx;
	unsigned int rx_buf_sz;		/* Based on MTU+slack. */
	u8 wolopts;

	u8 tx_thresh, rx_thresh;

	struct mii_if_info mii_if;
	void __iomem *base;
};

static int  mdio_read(struct net_device *dev, int phy_id, int location);
static void mdio_write(struct net_device *dev, int phy_id, int location, int value);
static int  rhine_open(struct net_device *dev);
static void rhine_tx_timeout(struct net_device *dev);
static int  rhine_start_tx(struct sk_buff *skb, struct net_device *dev);
static irqreturn_t rhine_interrupt(int irq, void *dev_instance);
static void rhine_tx(struct net_device *dev);
static int rhine_rx(struct net_device *dev, int limit);
static void rhine_error(struct net_device *dev, int intr_status);
static void rhine_set_rx_mode(struct net_device *dev);
static struct net_device_stats *rhine_get_stats(struct net_device *dev);
static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
static const struct ethtool_ops netdev_ethtool_ops;
static int  rhine_close(struct net_device *dev);
static void rhine_shutdown (struct pci_dev *pdev);

#define RHINE_WAIT_FOR(condition) do {					\
	int i=1024;							\
	while (!(condition) && --i)					\
		;							\
	if (debug > 1 && i < 512)					\
		printk(KERN_INFO "%s: %4d cycles used @ %s:%d\n",	\
				DRV_NAME, 1024-i, __func__, __LINE__);	\
} while(0)

static inline u32 get_intr_status(struct net_device *dev)
{
	struct rhine_private *rp = netdev_priv(dev);
	void __iomem *ioaddr = rp->base;
	u32 intr_status;

	intr_status = ioread16(ioaddr + IntrStatus);
	/* On Rhine-II, Bit 3 indicates Tx descriptor write-back race. */
	if (rp->quirks & rqStatusWBRace)
		intr_status |= ioread8(ioaddr + IntrStatus2) << 16;
	return intr_status;
}

/*
 * Get power related registers into sane state.
 * Notify user about past WOL event.
 */
static void rhine_power_init(struct net_device *dev)
{
	struct rhine_private *rp = netdev_priv(dev);
	void __iomem *ioaddr = rp->base;
	u16 wolstat;

	if (rp->quirks & rqWOL) {
		/* Make sure chip is in power state D0 */
		iowrite8(ioread8(ioaddr + StickyHW) & 0xFC, ioaddr + StickyHW);

		/* Disable "force PME-enable" */
		iowrite8(0x80, ioaddr + WOLcgClr);

		/* Clear power-event config bits (WOL) */
		iowrite8(0xFF, ioaddr + WOLcrClr);
		/* More recent cards can manage two additional patterns */
		if (rp->quirks & rq6patterns)
			iowrite8(0x03, ioaddr + WOLcrClr1);

		/* Save power-event status bits */
		wolstat = ioread8(ioaddr + PwrcsrSet);
		if (rp->quirks & rq6patterns)
			wolstat |= (ioread8(ioaddr + PwrcsrSet1) & 0x03) << 8;

		/* Clear power-event status bits */
		iowrite8(0xFF, ioaddr + PwrcsrClr);
		if (rp->quirks & rq6patterns)
			iowrite8(0x03, ioaddr + PwrcsrClr1);

		if (wolstat) {
			char *reason;
			switch (wolstat) {
			case WOLmagic:
				reason = "Magic packet";
				break;
			case WOLlnkon:
				reason = "Link went up";
				break;
			case WOLlnkoff:
				reason = "Link went down";
				break;
			case WOLucast:
				reason = "Unicast packet";
				break;
			case WOLbmcast:
				reason = "Multicast/broadcast packet";
				break;
			default:
				reason = "Unknown";
			}
			printk(KERN_INFO "%s: Woke system up. Reason: %s.\n",
			       DRV_NAME, reason);
		}
	}
}

static void rhine_chip_reset(struct net_device *dev)
{
	struct rhine_private *rp = netdev_priv(dev);
	void __iomem *ioaddr = rp->base;

	iowrite8(Cmd1Reset, ioaddr + ChipCmd1);
	IOSYNC;

	if (ioread8(ioaddr + ChipCmd1) & Cmd1Reset) {
		printk(KERN_INFO "%s: Reset not complete yet. "
			"Trying harder.\n", DRV_NAME);

		/* Force reset */
		if (rp->quirks & rqForceReset)
			iowrite8(0x40, ioaddr + MiscCmd);

		/* Reset can take somewhat longer (rare) */
		RHINE_WAIT_FOR(!(ioread8(ioaddr + ChipCmd1) & Cmd1Reset));
	}

	if (debug > 1)
		printk(KERN_INFO "%s: Reset %s.\n", dev->name,
			(ioread8(ioaddr + ChipCmd1) & Cmd1Reset) ?
			"failed" : "succeeded");
}

#ifdef USE_MMIO
static void enable_mmio(long pioaddr, u32 quirks)
{
	int n;
	if (quirks & rqRhineI) {
		/* More recent docs say that this bit is reserved ... */
		n = inb(pioaddr + ConfigA) | 0x20;
		outb(n, pioaddr + ConfigA);
	} else {
		n = inb(pioaddr + ConfigD) | 0x80;
		outb(n, pioaddr + ConfigD);
	}
}
#endif

/*
 * Loads bytes 0x00-0x05, 0x6E-0x6F, 0x78-0x7B from EEPROM
 * (plus 0x6C for Rhine-I/II)
 */
static void __devinit rhine_reload_eeprom(long pioaddr, struct net_device *dev)
{
	struct rhine_private *rp = netdev_priv(dev);
	void __iomem *ioaddr = rp->base;

	outb(0x20, pioaddr + MACRegEEcsr);
	RHINE_WAIT_FOR(!(inb(pioaddr + MACRegEEcsr) & 0x20));

#ifdef USE_MMIO
	/*
	 * Reloading from EEPROM overwrites ConfigA-D, so we must re-enable
	 * MMIO. If reloading EEPROM was done first this could be avoided, but
	 * it is not known if that still works with the "win98-reboot" problem.
	 */
	enable_mmio(pioaddr, rp->quirks);
#endif

	/* Turn off EEPROM-controlled wake-up (magic packet) */
	if (rp->quirks & rqWOL)
		iowrite8(ioread8(ioaddr + ConfigA) & 0xFC, ioaddr + ConfigA);

}

#ifdef CONFIG_NET_POLL_CONTROLLER
static void rhine_poll(struct net_device *dev)
{
	disable_irq(dev->irq);
	rhine_interrupt(dev->irq, (void *)dev);
	enable_irq(dev->irq);
}
#endif

static int rhine_napipoll(struct napi_struct *napi, int budget)
{
	struct rhine_private *rp = container_of(napi, struct rhine_private, napi);
	struct net_device *dev = rp->dev;
	void __iomem *ioaddr = rp->base;
	int work_done;

	work_done = rhine_rx(dev, budget);

	if (work_done < budget) {
		napi_complete(napi);

		iowrite16(IntrRxDone | IntrRxErr | IntrRxEmpty| IntrRxOverflow |
			  IntrRxDropped | IntrRxNoBuf | IntrTxAborted |
			  IntrTxDone | IntrTxError | IntrTxUnderrun |
			  IntrPCIErr | IntrStatsMax | IntrLinkChange,
			  ioaddr + IntrEnable);
	}
	return work_done;
}

static void __devinit rhine_hw_init(struct net_device *dev, long pioaddr)
{
	struct rhine_private *rp = netdev_priv(dev);

	/* Reset the chip to erase previous misconfiguration. */
	rhine_chip_reset(dev);

	/* Rhine-I needs extra time to recuperate before EEPROM reload */
	if (rp->quirks & rqRhineI)
		msleep(5);

	/* Reload EEPROM controlled bytes cleared by soft reset */
	rhine_reload_eeprom(pioaddr, dev);
}

static const struct net_device_ops rhine_netdev_ops = {
	.ndo_open		 = rhine_open,
	.ndo_stop		 = rhine_close,
	.ndo_start_xmit		 = rhine_start_tx,
	.ndo_get_stats		 = rhine_get_stats,
	.ndo_set_multicast_list	 = rhine_set_rx_mode,
	.ndo_validate_addr	 = eth_validate_addr,
	.ndo_set_mac_address 	 = eth_mac_addr,
	.ndo_do_ioctl		 = netdev_ioctl,
	.ndo_tx_timeout 	 = rhine_tx_timeout,
#ifdef CONFIG_NET_POLL_CONTROLLER
	.ndo_poll_controller	 = rhine_poll,
#endif
};

static int __devinit rhine_init_one(struct pci_dev *pdev,
				    const struct pci_device_id *ent)
{
	struct net_device *dev;
	struct rhine_private *rp;
	int i, rc;
	u32 quirks;
	long pioaddr;
	long memaddr;
	void __iomem *ioaddr;
	int io_size, phy_id;
	const char *name;
#ifdef USE_MMIO
	int bar = 1;
#else
	int bar = 0;
#endif

/* when built into the kernel, we only print version if device is found */
#ifndef MODULE
	static int printed_version;
	if (!printed_version++)
		printk(version);
#endif

	io_size = 256;
	phy_id = 0;
	quirks = 0;
	name = "Rhine";
	if (pdev->revision < VTunknown0) {
		quirks = rqRhineI;
		io_size = 128;
	}
	else if (pdev->revision >= VT6102) {
		quirks = rqWOL | rqForceReset;
		if (pdev->revision < VT6105) {
			name = "Rhine II";
			quirks |= rqStatusWBRace;	/* Rhine-II exclusive */
		}
		else {
			phy_id = 1;	/* Integrated PHY, phy_id fixed to 1 */
			if (pdev->revision >= VT6105_B0)
				quirks |= rq6patterns;
			if (pdev->revision < VT6105M)
				name = "Rhine III";
			else
				name = "Rhine III (Management Adapter)";
		}
	}

	rc = pci_enable_device(pdev);
	if (rc)
		goto err_out;

	/* this should always be supported */
	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
	if (rc) {
		printk(KERN_ERR "32-bit PCI DMA addresses not supported by "
		       "the card!?\n");
		goto err_out;
	}

	/* sanity check */
	if ((pci_resource_len(pdev, 0) < io_size) ||
	    (pci_resource_len(pdev, 1) < io_size)) {
		rc = -EIO;
		printk(KERN_ERR "Insufficient PCI resources, aborting\n");
		goto err_out;
	}

	pioaddr = pci_resource_start(pdev, 0);
	memaddr = pci_resource_start(pdev, 1);

	pci_set_master(pdev);

	dev = alloc_etherdev(sizeof(struct rhine_private));
	if (!dev) {
		rc = -ENOMEM;
		printk(KERN_ERR "alloc_etherdev failed\n");
		goto err_out;
	}
	SET_NETDEV_DEV(dev, &pdev->dev);

	rp = netdev_priv(dev);
	rp->dev = dev;
	rp->quirks = quirks;
	rp->pioaddr = pioaddr;
	rp->pdev = pdev;

	rc = pci_request_regions(pdev, DRV_NAME);
	if (rc)
		goto err_out_free_netdev;

	ioaddr = pci_iomap(pdev, bar, io_size);
	if (!ioaddr) {
		rc = -EIO;
		printk(KERN_ERR "ioremap failed for device %s, region 0x%X "
		       "@ 0x%lX\n", pci_name(pdev), io_size, memaddr);
		goto err_out_free_res;
	}

#ifdef USE_MMIO
	enable_mmio(pioaddr, quirks);

	/* Check that selected MMIO registers match the PIO ones */
	i = 0;
	while (mmio_verify_registers[i]) {
		int reg = mmio_verify_registers[i++];
		unsigned char a = inb(pioaddr+reg);
		unsigned char b = readb(ioaddr+reg);
		if (a != b) {
			rc = -EIO;
			printk(KERN_ERR "MMIO do not match PIO [%02x] "
			       "(%02x != %02x)\n", reg, a, b);
			goto err_out_unmap;
		}
	}
#endif /* USE_MMIO */

	dev->base_addr = (unsigned long)ioaddr;
	rp->base = ioaddr;

	/* Get chip registers into a sane state */
	rhine_power_init(dev);
	rhine_hw_init(dev, pioaddr);

	for (i = 0; i < 6; i++)
		dev->dev_addr[i] = ioread8(ioaddr + StationAddr + i);
	memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);

	if (!is_valid_ether_addr(dev->perm_addr)) {
		rc = -EIO;
		printk(KERN_ERR "Invalid MAC address\n");
		goto err_out_unmap;
	}

	/* For Rhine-I/II, phy_id is loaded from EEPROM */
	if (!phy_id)
		phy_id = ioread8(ioaddr + 0x6C);

	dev->irq = pdev->irq;

	spin_lock_init(&rp->lock);
	rp->mii_if.dev = dev;
	rp->mii_if.mdio_read = mdio_read;
	rp->mii_if.mdio_write = mdio_write;
	rp->mii_if.phy_id_mask = 0x1f;
	rp->mii_if.reg_num_mask = 0x1f;

	/* The chip-specific entries in the device structure. */
	dev->netdev_ops = &rhine_netdev_ops;
	dev->ethtool_ops = &netdev_ethtool_ops,
	dev->watchdog_timeo = TX_TIMEOUT;

	netif_napi_add(dev, &rp->napi, rhine_napipoll, 64);

	if (rp->quirks & rqRhineI)
		dev->features |= NETIF_F_SG|NETIF_F_HW_CSUM;

	/* dev->name not defined before register_netdev()! */
	rc = register_netdev(dev);
	if (rc)
		goto err_out_unmap;

	printk(KERN_INFO "%s: VIA %s at 0x%lx, %pM, IRQ %d.\n",
	       dev->name, name,
#ifdef USE_MMIO
	       memaddr,
#else
	       (long)ioaddr,
#endif
	       dev->dev_addr, pdev->irq);

	pci_set_drvdata(pdev, dev);

	{
		u16 mii_cmd;
		int mii_status = mdio_read(dev, phy_id, 1);
		mii_cmd = mdio_read(dev, phy_id, MII_BMCR) & ~BMCR_ISOLATE;
		mdio_write(dev, phy_id, MII_BMCR, mii_cmd);
		if (mii_status != 0xffff && mii_status != 0x0000) {
			rp->mii_if.advertising = mdio_read(dev, phy_id, 4);
			printk(KERN_INFO "%s: MII PHY found at address "
			       "%d, status 0x%4.4x advertising %4.4x "
			       "Link %4.4x.\n", dev->name, phy_id,
			       mii_status, rp->mii_if.advertising,
			       mdio_read(dev, phy_id, 5));

			/* set IFF_RUNNING */
			if (mii_status & BMSR_LSTATUS)
				netif_carrier_on(dev);
			else
				netif_carrier_off(dev);

		}
	}
	rp->mii_if.phy_id = phy_id;
	if (debug > 1 && avoid_D3)
		printk(KERN_INFO "%s: No D3 power state at shutdown.\n",
		       dev->name);

	return 0;

err_out_unmap:
	pci_iounmap(pdev, ioaddr);
err_out_free_res:
	pci_release_regions(pdev);
err_out_free_netdev:
	free_netdev(dev);
err_out:
	return rc;
}

static int alloc_ring(struct net_device* dev)
{
	struct rhine_private *rp = netdev_priv(dev);
	void *ring;
	dma_addr_t ring_dma;

	ring = pci_alloc_consistent(rp->pdev,
				    RX_RING_SIZE * sizeof(struct rx_desc) +
				    TX_RING_SIZE * sizeof(struct tx_desc),
				    &ring_dma);
	if (!ring) {
		printk(KERN_ERR "Could not allocate DMA memory.\n");
		return -ENOMEM;
	}
	if (rp->quirks & rqRhineI) {
		rp->tx_bufs = pci_alloc_consistent(rp->pdev,
						   PKT_BUF_SZ * TX_RING_SIZE,
						   &rp->tx_bufs_dma);
		if (rp->tx_bufs == NULL) {
			pci_free_consistent(rp->pdev,
				    RX_RING_SIZE * sizeof(struct rx_desc) +
				    TX_RING_SIZE * sizeof(struct tx_desc),
				    ring, ring_dma);
			return -ENOMEM;
		}
	}

	rp->rx_ring = ring;
	rp->tx_ring = ring + RX_RING_SIZE * sizeof(struct rx_desc);
	rp->rx_ring_dma = ring_dma;
	rp->tx_ring_dma = ring_dma + RX_RING_SIZE * sizeof(struct rx_desc);

	return 0;
}

static void free_ring(struct net_device* dev)
{
	struct rhine_private *rp = netdev_priv(dev);

	pci_free_consistent(rp->pdev,
			    RX_RING_SIZE * sizeof(struct rx_desc) +
			    TX_RING_SIZE * sizeof(struct tx_desc),
			    rp->rx_ring, rp->rx_ring_dma);
	rp->tx_ring = NULL;

	if (rp->tx_bufs)
		pci_free_consistent(rp->pdev, PKT_BUF_SZ * TX_RING_SIZE,
				    rp->tx_bufs, rp->tx_bufs_dma);

	rp->tx_bufs = NULL;

}

static void alloc_rbufs(struct net_device *dev)
{
	struct rhine_private *rp = netdev_priv(dev);
	dma_addr_t next;
	int i;

	rp->dirty_rx = rp->cur_rx = 0;

	rp->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32);
	rp->rx_head_desc = &rp->rx_ring[0];
	next = rp->rx_ring_dma;

	/* Init the ring entries */
	for (i = 0; i < RX_RING_SIZE; i++) {
		rp->rx_ring[i].rx_status = 0;
		rp->rx_ring[i].desc_length = cpu_to_le32(rp->rx_buf_sz);
		next += sizeof(struct rx_desc);
		rp->rx_ring[i].next_desc = cpu_to_le32(next);
		rp->rx_skbuff[i] = NULL;
	}
	/* Mark the last entry as wrapping the ring. */
	rp->rx_ring[i-1].next_desc = cpu_to_le32(rp->rx_ring_dma);

	/* Fill in the Rx buffers.  Handle allocation failure gracefully. */
	for (i = 0; i < RX_RING_SIZE; i++) {
		struct sk_buff *skb = netdev_alloc_skb(dev, rp->rx_buf_sz);
		rp->rx_skbuff[i] = skb;
		if (skb == NULL)
			break;
		skb->dev = dev;                 /* Mark as being used by this device. */

		rp->rx_skbuff_dma[i] =
			pci_map_single(rp->pdev, skb->data, rp->rx_buf_sz,
				       PCI_DMA_FROMDEVICE);

		rp->rx_ring[i].addr = cpu_to_le32(rp->rx_skbuff_dma[i]);
		rp->rx_ring[i].rx_status = cpu_to_le32(DescOwn);
	}
	rp->dirty_rx = (unsigned int)(i - RX_RING_SIZE);
}

static void free_rbufs(struct net_device* dev)
{
	struct rhine_private *rp = netdev_priv(dev);
	int i;

	/* Free all the skbuffs in the Rx queue. */
	for (i = 0; i < RX_RING_SIZE; i++) {
		rp->rx_ring[i].rx_status = 0;
		rp->rx_ring[i].addr = cpu_to_le32(0xBADF00D0); /* An invalid address. */
		if (rp->rx_skbuff[i]) {
			pci_unmap_single(rp->pdev,
					 rp->rx_skbuff_dma[i],
					 rp->rx_buf_sz, PCI_DMA_FROMDEVICE);
			dev_kfree_skb(rp->rx_skbuff[i]);
		}
		rp->rx_skbuff[i] = NULL;
	}
}

static void alloc_tbufs(struct net_device* dev)
{
	struct rhine_private *rp = netdev_priv(dev);
	dma_addr_t next;
	int i;

	rp->dirty_tx = rp->cur_tx = 0;
	next = rp->tx_ring_dma;
	for (i = 0; i < TX_RING_SIZE; i++) {
		rp->tx_skbuff[i] = NULL;
		rp->tx_ring[i].tx_status = 0;
		rp->tx_ring[i].desc_length = cpu_to_le32(TXDESC);
		next += sizeof(struct tx_desc);
		rp->tx_ring[i].next_desc = cpu_to_le32(next);
		if (rp->quirks & rqRhineI)
			rp->tx_buf[i] = &rp->tx_bufs[i * PKT_BUF_SZ];
	}
	rp->tx_ring[i-1].next_desc = cpu_to_le32(rp->tx_ring_dma);

}

static void free_tbufs(struct net_device* dev)
{
	struct rhine_private *rp = netdev_priv(dev);
	int i;

	for (i = 0; i < TX_RING_SIZE; i++) {
		rp->tx_ring[i].tx_status = 0;
		rp->tx_ring[i].desc_length = cpu_to_le32(TXDESC);
		rp->tx_ring[i].addr = cpu_to_le32(0xBADF00D0); /* An invalid address. */
		if (rp->tx_skbuff[i]) {
			if (rp->tx_skbuff_dma[i]) {
				pci_unmap_single(rp->pdev,
						 rp->tx_skbuff_dma[i],
						 rp->tx_skbuff[i]->len,
						 PCI_DMA_TODEVICE);
			}
			dev_kfree_skb(rp->tx_skbuff[i]);
		}
		rp->tx_skbuff[i] = NULL;
		rp->tx_buf[i] = NULL;
	}
}

static void rhine_check_media(struct net_device *dev, unsigned int init_media)
{
	struct rhine_private *rp = netdev_priv(dev);
	void __iomem *ioaddr = rp->base;

	mii_check_media(&rp->mii_if, debug, init_media);

	if (rp->mii_if.full_duplex)
	    iowrite8(ioread8(ioaddr + ChipCmd1) | Cmd1FDuplex,
		   ioaddr + ChipCmd1);
	else
	    iowrite8(ioread8(ioaddr + ChipCmd1) & ~Cmd1FDuplex,
		   ioaddr + ChipCmd1);
	if (debug > 1)
		printk(KERN_INFO "%s: force_media %d, carrier %d\n", dev->name,
			rp->mii_if.force_media, netif_carrier_ok(dev));
}

/* Called after status of force_media possibly changed */
static void rhine_set_carrier(struct mii_if_info *mii)
{
	if (mii->force_media) {
		/* autoneg is off: Link is always assumed to be up */
		if (!netif_carrier_ok(mii->dev))
			netif_carrier_on(mii->dev);
	}
	else	/* Let MMI library update carrier status */
		rhine_check_media(mii->dev, 0);
	if (debug > 1)
		printk(KERN_INFO "%s: force_media %d, carrier %d\n",
		       mii->dev->name, mii->force_media,
		       netif_carrier_ok(mii->dev));
}

static void init_registers(struct net_device *dev)
{
	struct rhine_private *rp = netdev_priv(dev);
	void __iomem *ioaddr = rp->base;
	int i;

	for (i = 0; i < 6; i++)
		iowrite8(dev->dev_addr[i], ioaddr + StationAddr + i);

	/* Initialize other registers. */
	iowrite16(0x0006, ioaddr + PCIBusConfig);	/* Tune configuration??? */
	/* Configure initial FIFO thresholds. */
	iowrite8(0x20, ioaddr + TxConfig);
	rp->tx_thresh = 0x20;
	rp->rx_thresh = 0x60;		/* Written in rhine_set_rx_mode(). */

	iowrite32(rp->rx_ring_dma, ioaddr + RxRingPtr);
	iowrite32(rp->tx_ring_dma, ioaddr + TxRingPtr);

	rhine_set_rx_mode(dev);

	napi_enable(&rp->napi);

	/* Enable interrupts by setting the interrupt mask. */
	iowrite16(IntrRxDone | IntrRxErr | IntrRxEmpty| IntrRxOverflow |
	       IntrRxDropped | IntrRxNoBuf | IntrTxAborted |
	       IntrTxDone | IntrTxError | IntrTxUnderrun |
	       IntrPCIErr | IntrStatsMax | IntrLinkChange,
	       ioaddr + IntrEnable);

	iowrite16(CmdStart | CmdTxOn | CmdRxOn | (Cmd1NoTxPoll << 8),
	       ioaddr + ChipCmd);
	rhine_check_media(dev, 1);
}

/* Enable MII link status auto-polling (required for IntrLinkChange) */
static void rhine_enable_linkmon(void __iomem *ioaddr)
{
	iowrite8(0, ioaddr + MIICmd);
	iowrite8(MII_BMSR, ioaddr + MIIRegAddr);
	iowrite8(0x80, ioaddr + MIICmd);

	RHINE_WAIT_FOR((ioread8(ioaddr + MIIRegAddr) & 0x20));

	iowrite8(MII_BMSR | 0x40, ioaddr + MIIRegAddr);
}

/* Disable MII link status auto-polling (required for MDIO access) */
static void rhine_disable_linkmon(void __iomem *ioaddr, u32 quirks)
{
	iowrite8(0, ioaddr + MIICmd);

	if (quirks & rqRhineI) {
		iowrite8(0x01, ioaddr + MIIRegAddr);	// MII_BMSR

		/* Can be called from ISR. Evil. */
		mdelay(1);

		/* 0x80 must be set immediately before turning it off */
		iowrite8(0x80, ioaddr + MIICmd);

		RHINE_WAIT_FOR(ioread8(ioaddr + MIIRegAddr) & 0x20);

		/* Heh. Now clear 0x80 again. */
		iowrite8(0, ioaddr + MIICmd);
	}
	else
		RHINE_WAIT_FOR(ioread8(ioaddr + MIIRegAddr) & 0x80);
}

/* Read and write over the MII Management Data I/O (MDIO) interface. */

static int mdio_read(struct net_device *dev, int phy_id, int regnum)
{
	struct rhine_private *rp = netdev_priv(dev);
	void __iomem *ioaddr = rp->base;
	int result;

	rhine_disable_linkmon(ioaddr, rp->quirks);

	/* rhine_disable_linkmon already cleared MIICmd */
	iowrite8(phy_id, ioaddr + MIIPhyAddr);
	iowrite8(regnum, ioaddr + MIIRegAddr);
	iowrite8(0x40, ioaddr + MIICmd);		/* Trigger read */
	RHINE_WAIT_FOR(!(ioread8(ioaddr + MIICmd) & 0x40));
	result = ioread16(ioaddr + MIIData);

	rhine_enable_linkmon(ioaddr);
	return result;
}

static void mdio_write(struct net_device *dev, int phy_id, int regnum, int value)
{
	struct rhine_private *rp = netdev_priv(dev);
	void __iomem *ioaddr = rp->base;

	rhine_disable_linkmon(ioaddr, rp->quirks);

	/* rhine_disable_linkmon already cleared MIICmd */
	iowrite8(phy_id, ioaddr + MIIPhyAddr);
	iowrite8(regnum, ioaddr + MIIRegAddr);
	iowrite16(value, ioaddr + MIIData);
	iowrite8(0x20, ioaddr + MIICmd);		/* Trigger write */
	RHINE_WAIT_FOR(!(ioread8(ioaddr + MIICmd) & 0x20));

	rhine_enable_linkmon(ioaddr);
}

static int rhine_open(struct net_device *dev)
{
	struct rhine_private *rp = netdev_priv(dev);
	void __iomem *ioaddr = rp->base;
	int rc;

	rc = request_irq(rp->pdev->irq, &rhine_interrupt, IRQF_SHARED, dev->name,
			dev);
	if (rc)
		return rc;

	if (debug > 1)
		printk(KERN_DEBUG "%s: rhine_open() irq %d.\n",
		       dev->name, rp->pdev->irq);

	rc = alloc_ring(dev);
	if (rc) {
		free_irq(rp->pdev->irq, dev);
		return rc;
	}
	alloc_rbufs(dev);
	alloc_tbufs(dev);
	rhine_chip_reset(dev);
	init_registers(dev);
	if (debug > 2)
		printk(KERN_DEBUG "%s: Done rhine_open(), status %4.4x "
		       "MII status: %4.4x.\n",
		       dev->name, ioread16(ioaddr + ChipCmd),
		       mdio_read(dev, rp->mii_if.phy_id, MII_BMSR));

	netif_start_queue(dev);

	return 0;
}

static void rhine_tx_timeout(struct net_device *dev)
{
	struct rhine_private *rp = netdev_priv(dev);
	void __iomem *ioaddr = rp->base;

	printk(KERN_WARNING "%s: Transmit timed out, status %4.4x, PHY status "
	       "%4.4x, resetting...\n",
	       dev->name, ioread16(ioaddr + IntrStatus),
	       mdio_read(dev, rp->mii_if.phy_id, MII_BMSR));

	/* protect against concurrent rx interrupts */
	disable_irq(rp->pdev->irq);

	napi_disable(&rp->napi);

	spin_lock(&rp->lock);

	/* clear all descriptors */
	free_tbufs(dev);
	free_rbufs(dev);
	alloc_tbufs(dev);
	alloc_rbufs(dev);

	/* Reinitialize the hardware. */
	rhine_chip_reset(dev);
	init_registers(dev);

	spin_unlock(&rp->lock);
	enable_irq(rp->pdev->irq);

	dev->trans_start = jiffies;
	rp->stats.tx_errors++;
	netif_wake_queue(dev);
}

static int rhine_start_tx(struct sk_buff *skb, struct net_device *dev)
{
	struct rhine_private *rp = netdev_priv(dev);
	void __iomem *ioaddr = rp->base;
	unsigned entry;

	/* Caution: the write order is important here, set the field
	   with the "ownership" bits last. */

	/* Calculate the next Tx descriptor entry. */
	entry = rp->cur_tx % TX_RING_SIZE;

	if (skb_padto(skb, ETH_ZLEN))
		return 0;

	rp->tx_skbuff[entry] = skb;

	if ((rp->quirks & rqRhineI) &&
	    (((unsigned long)skb->data & 3) || skb_shinfo(skb)->nr_frags != 0 || skb->ip_summed == CHECKSUM_PARTIAL)) {
		/* Must use alignment buffer. */
		if (skb->len > PKT_BUF_SZ) {
			/* packet too long, drop it */
			dev_kfree_skb(skb);
			rp->tx_skbuff[entry] = NULL;
			rp->stats.tx_dropped++;
			return 0;
		}

		/* Padding is not copied and so must be redone. */
		skb_copy_and_csum_dev(skb, rp->tx_buf[entry]);
		if (skb->len < ETH_ZLEN)
			memset(rp->tx_buf[entry] + skb->len, 0,
			       ETH_ZLEN - skb->len);
		rp->tx_skbuff_dma[entry] = 0;
		rp->tx_ring[entry].addr = cpu_to_le32(rp->tx_bufs_dma +
						      (rp->tx_buf[entry] -
						       rp->tx_bufs));
	} else {
		rp->tx_skbuff_dma[entry] =
			pci_map_single(rp->pdev, skb->data, skb->len,
				       PCI_DMA_TODEVICE);
		rp->tx_ring[entry].addr = cpu_to_le32(rp->tx_skbuff_dma[entry]);
	}

	rp->tx_ring[entry].desc_length =
		cpu_to_le32(TXDESC | (skb->len >= ETH_ZLEN ? skb->len : ETH_ZLEN));

	/* lock eth irq */
	spin_lock_irq(&rp->lock);
	wmb();
	rp->tx_ring[entry].tx_status = cpu_to_le32(DescOwn);
	wmb();

	rp->cur_tx++;

	/* Non-x86 Todo: explicitly flush cache lines here. */

	/* Wake the potentially-idle transmit channel */
	iowrite8(ioread8(ioaddr + ChipCmd1) | Cmd1TxDemand,
	       ioaddr + ChipCmd1);
	IOSYNC;

	if (rp->cur_tx == rp->dirty_tx + TX_QUEUE_LEN)
		netif_stop_queue(dev);

	dev->trans_start = jiffies;

	spin_unlock_irq(&rp->lock);

	if (debug > 4) {
		printk(KERN_DEBUG "%s: Transmit frame #%d queued in slot %d.\n",
		       dev->name, rp->cur_tx-1, entry);
	}
	return 0;
}

/* The interrupt handler does all of the Rx thread work and cleans up
   after the Tx thread. */
static irqreturn_t rhine_interrupt(int irq, void *dev_instance)
{
	struct net_device *dev = dev_instance;
	struct rhine_private *rp = netdev_priv(dev);
	void __iomem *ioaddr = rp->base;
	u32 intr_status;
	int boguscnt = max_interrupt_work;
	int handled = 0;

	while ((intr_status = get_intr_status(dev))) {
		handled = 1;

		/* Acknowledge all of the current interrupt sources ASAP. */
		if (intr_status & IntrTxDescRace)
			iowrite8(0x08, ioaddr + IntrStatus2);
		iowrite16(intr_status & 0xffff, ioaddr + IntrStatus);
		IOSYNC;

		if (debug > 4)
			printk(KERN_DEBUG "%s: Interrupt, status %8.8x.\n",
			       dev->name, intr_status);

		if (intr_status & (IntrRxDone | IntrRxErr | IntrRxDropped |
				   IntrRxWakeUp | IntrRxEmpty | IntrRxNoBuf)) {
			iowrite16(IntrTxAborted |
				  IntrTxDone | IntrTxError | IntrTxUnderrun |
				  IntrPCIErr | IntrStatsMax | IntrLinkChange,
				  ioaddr + IntrEnable);

			napi_schedule(&rp->napi);
		}

		if (intr_status & (IntrTxErrSummary | IntrTxDone)) {
			if (intr_status & IntrTxErrSummary) {
				/* Avoid scavenging before Tx engine turned off */
				RHINE_WAIT_FOR(!(ioread8(ioaddr+ChipCmd) & CmdTxOn));
				if (debug > 2 &&
				    ioread8(ioaddr+ChipCmd) & CmdTxOn)
					printk(KERN_WARNING "%s: "
					       "rhine_interrupt() Tx engine "
					       "still on.\n", dev->name);
			}
			rhine_tx(dev);
		}

		/* Abnormal error summary/uncommon events handlers. */
		if (intr_status & (IntrPCIErr | IntrLinkChange |
				   IntrStatsMax | IntrTxError | IntrTxAborted |
				   IntrTxUnderrun | IntrTxDescRace))
			rhine_error(dev, intr_status);

		if (--boguscnt < 0) {
			printk(KERN_WARNING "%s: Too much work at interrupt, "
			       "status=%#8.8x.\n",
			       dev->name, intr_status);
			break;
		}
	}

	if (debug > 3)
		printk(KERN_DEBUG "%s: exiting interrupt, status=%8.8x.\n",
		       dev->name, ioread16(ioaddr + IntrStatus));
	return IRQ_RETVAL(handled);
}

/* This routine is logically part of the interrupt handler, but isolated
   for clarity. */
static void rhine_tx(struct net_device *dev)
{
	struct rhine_private *rp = netdev_priv(dev);
	int txstatus = 0, entry = rp->dirty_tx % TX_RING_SIZE;

	spin_lock(&rp->lock);

	/* find and cleanup dirty tx descriptors */
	while (rp->dirty_tx != rp->cur_tx) {
		txstatus = le32_to_cpu(rp->tx_ring[entry].tx_status);
		if (debug > 6)
			printk(KERN_DEBUG "Tx scavenge %d status %8.8x.\n",
			       entry, txstatus);
		if (txstatus & DescOwn)
			break;
		if (txstatus & 0x8000) {
			if (debug > 1)
				printk(KERN_DEBUG "%s: Transmit error, "
				       "Tx status %8.8x.\n",
				       dev->name, txstatus);
			rp->stats.tx_errors++;
			if (txstatus & 0x0400) rp->stats.tx_carrier_errors++;
			if (txstatus & 0x0200) rp->stats.tx_window_errors++;
			if (txstatus & 0x0100) rp->stats.tx_aborted_errors++;
			if (txstatus & 0x0080) rp->stats.tx_heartbeat_errors++;
			if (((rp->quirks & rqRhineI) && txstatus & 0x0002) ||
			    (txstatus & 0x0800) || (txstatus & 0x1000)) {
				rp->stats.tx_fifo_errors++;
				rp->tx_ring[entry].tx_status = cpu_to_le32(DescOwn);
				break; /* Keep the skb - we try again */
			}
			/* Transmitter restarted in 'abnormal' handler. */
		} else {
			if (rp->quirks & rqRhineI)
				rp->stats.collisions += (txstatus >> 3) & 0x0F;
			else
				rp->stats.collisions += txstatus & 0x0F;
			if (debug > 6)
				printk(KERN_DEBUG "collisions: %1.1x:%1.1x\n",
				       (txstatus >> 3) & 0xF,
				       txstatus & 0xF);
			rp->stats.tx_bytes += rp->tx_skbuff[entry]->len;
			rp->stats.tx_packets++;
		}
		/* Free the original skb. */
		if (rp->tx_skbuff_dma[entry]) {
			pci_unmap_single(rp->pdev,
					 rp->tx_skbuff_dma[entry],
					 rp->tx_skbuff[entry]->len,
					 PCI_DMA_TODEVICE);
		}
		dev_kfree_skb_irq(rp->tx_skbuff[entry]);
		rp->tx_skbuff[entry] = NULL;
		entry = (++rp->dirty_tx) % TX_RING_SIZE;
	}
	if ((rp->cur_tx - rp->dirty_tx) < TX_QUEUE_LEN - 4)
		netif_wake_queue(dev);

	spin_unlock(&rp->lock);
}

/* Process up to limit frames from receive ring */
static int rhine_rx(struct net_device *dev, int limit)
{
	struct rhine_private *rp = netdev_priv(dev);
	int count;
	int entry = rp->cur_rx % RX_RING_SIZE;

	if (debug > 4) {
		printk(KERN_DEBUG "%s: rhine_rx(), entry %d status %8.8x.\n",
		       dev->name, entry,
		       le32_to_cpu(rp->rx_head_desc->rx_status));
	}

	/* If EOP is set on the next entry, it's a new packet. Send it up. */
	for (count = 0; count < limit; ++count) {
		struct rx_desc *desc = rp->rx_head_desc;
		u32 desc_status = le32_to_cpu(desc->rx_status);
		int data_size = desc_status >> 16;

		if (desc_status & DescOwn)
			break;

		if (debug > 4)
			printk(KERN_DEBUG "rhine_rx() status is %8.8x.\n",
			       desc_status);

		if ((desc_status & (RxWholePkt | RxErr)) != RxWholePkt) {
			if ((desc_status & RxWholePkt) != RxWholePkt) {
				printk(KERN_WARNING "%s: Oversized Ethernet "
				       "frame spanned multiple buffers, entry "
				       "%#x length %d status %8.8x!\n",
				       dev->name, entry, data_size,
				       desc_status);
				printk(KERN_WARNING "%s: Oversized Ethernet "
				       "frame %p vs %p.\n", dev->name,
				       rp->rx_head_desc, &rp->rx_ring[entry]);
				rp->stats.rx_length_errors++;
			} else if (desc_status & RxErr) {
				/* There was a error. */
				if (debug > 2)
					printk(KERN_DEBUG "rhine_rx() Rx "
					       "error was %8.8x.\n",
					       desc_status);
				rp->stats.rx_errors++;
				if (desc_status & 0x0030) rp->stats.rx_length_errors++;
				if (desc_status & 0x0048) rp->stats.rx_fifo_errors++;
				if (desc_status & 0x0004) rp->stats.rx_frame_errors++;
				if (desc_status & 0x0002) {
					/* this can also be updated outside the interrupt handler */
					spin_lock(&rp->lock);
					rp->stats.rx_crc_errors++;
					spin_unlock(&rp->lock);
				}
			}
		} else {
			struct sk_buff *skb;
			/* Length should omit the CRC */
			int pkt_len = data_size - 4;

			/* Check if the packet is long enough to accept without
			   copying to a minimally-sized skbuff. */
			if (pkt_len < rx_copybreak &&
				(skb = netdev_alloc_skb(dev, pkt_len + NET_IP_ALIGN)) != NULL) {
				skb_reserve(skb, NET_IP_ALIGN);	/* 16 byte align the IP header */
				pci_dma_sync_single_for_cpu(rp->pdev,
							    rp->rx_skbuff_dma[entry],
							    rp->rx_buf_sz,
							    PCI_DMA_FROMDEVICE);

				skb_copy_to_linear_data(skb,
						 rp->rx_skbuff[entry]->data,
						 pkt_len);
				skb_put(skb, pkt_len);
				pci_dma_sync_single_for_device(rp->pdev,
							       rp->rx_skbuff_dma[entry],
							       rp->rx_buf_sz,
							       PCI_DMA_FROMDEVICE);
			} else {
				skb = rp->rx_skbuff[entry];
				if (skb == NULL) {
					printk(KERN_ERR "%s: Inconsistent Rx "
					       "descriptor chain.\n",
					       dev->name);
					break;
				}
				rp->rx_skbuff[entry] = NULL;
				skb_put(skb, pkt_len);
				pci_unmap_single(rp->pdev,
						 rp->rx_skbuff_dma[entry],
						 rp->rx_buf_sz,
						 PCI_DMA_FROMDEVICE);
			}
			skb->protocol = eth_type_trans(skb, dev);
			netif_receive_skb(skb);
			rp->stats.rx_bytes += pkt_len;
			rp->stats.rx_packets++;
		}
		entry = (++rp->cur_rx) % RX_RING_SIZE;
		rp->rx_head_desc = &rp->rx_ring[entry];
	}

	/* Refill the Rx ring buffers. */
	for (; rp->cur_rx - rp->dirty_rx > 0; rp->dirty_rx++) {
		struct sk_buff *skb;
		entry = rp->dirty_rx % RX_RING_SIZE;
		if (rp->rx_skbuff[entry] == NULL) {
			skb = netdev_alloc_skb(dev, rp->rx_buf_sz);
			rp->rx_skbuff[entry] = skb;
			if (skb == NULL)
				break;	/* Better luck next round. */
			skb->dev = dev;	/* Mark as being used by this device. */
			rp->rx_skbuff_dma[entry] =
				pci_map_single(rp->pdev, skb->data,
					       rp->rx_buf_sz,
					       PCI_DMA_FROMDEVICE);
			rp->rx_ring[entry].addr = cpu_to_le32(rp->rx_skbuff_dma[entry]);
		}
		rp->rx_ring[entry].rx_status = cpu_to_le32(DescOwn);
	}

	return count;
}

/*
 * Clears the "tally counters" for CRC errors and missed frames(?).
 * It has been reported that some chips need a write of 0 to clear
 * these, for others the counters are set to 1 when written to and
 * instead cleared when read. So we clear them both ways ...
 */
static inline void clear_tally_counters(void __iomem *ioaddr)
{
	iowrite32(0, ioaddr + RxMissed);
	ioread16(ioaddr + RxCRCErrs);
	ioread16(ioaddr + RxMissed);
}

static void rhine_restart_tx(struct net_device *dev) {
	struct rhine_private *rp = netdev_priv(dev);
	void __iomem *ioaddr = rp->base;
	int entry = rp->dirty_tx % TX_RING_SIZE;
	u32 intr_status;

	/*
	 * If new errors occured, we need to sort them out before doing Tx.
	 * In that case the ISR will be back here RSN anyway.
	 */
	intr_status = get_intr_status(dev);

	if ((intr_status & IntrTxErrSummary) == 0) {

		/* We know better than the chip where it should continue. */
		iowrite32(rp->tx_ring_dma + entry * sizeof(struct tx_desc),
		       ioaddr + TxRingPtr);

		iowrite8(ioread8(ioaddr + ChipCmd) | CmdTxOn,
		       ioaddr + ChipCmd);
		iowrite8(ioread8(ioaddr + ChipCmd1) | Cmd1TxDemand,
		       ioaddr + ChipCmd1);
		IOSYNC;
	}
	else {
		/* This should never happen */
		if (debug > 1)
			printk(KERN_WARNING "%s: rhine_restart_tx() "
			       "Another error occured %8.8x.\n",
			       dev->name, intr_status);
	}

}

static void rhine_error(struct net_device *dev, int intr_status)
{
	struct rhine_private *rp = netdev_priv(dev);
	void __iomem *ioaddr = rp->base;

	spin_lock(&rp->lock);

	if (intr_status & IntrLinkChange)
		rhine_check_media(dev, 0);
	if (intr_status & IntrStatsMax) {
		rp->stats.rx_crc_errors += ioread16(ioaddr + RxCRCErrs);
		rp->stats.rx_missed_errors += ioread16(ioaddr + RxMissed);
		clear_tally_counters(ioaddr);
	}
	if (intr_status & IntrTxAborted) {
		if (debug > 1)
			printk(KERN_INFO "%s: Abort %8.8x, frame dropped.\n",
			       dev->name, intr_status);
	}
	if (intr_status & IntrTxUnderrun) {
		if (rp->tx_thresh < 0xE0)
			iowrite8(rp->tx_thresh += 0x20, ioaddr + TxConfig);
		if (debug > 1)
			printk(KERN_INFO "%s: Transmitter underrun, Tx "
			       "threshold now %2.2x.\n",
			       dev->name, rp->tx_thresh);
	}
	if (intr_status & IntrTxDescRace) {
		if (debug > 2)
			printk(KERN_INFO "%s: Tx descriptor write-back race.\n",
			       dev->name);
	}
	if ((intr_status & IntrTxError) &&
	    (intr_status & (IntrTxAborted |
	     IntrTxUnderrun | IntrTxDescRace)) == 0) {
		if (rp->tx_thresh < 0xE0) {
			iowrite8(rp->tx_thresh += 0x20, ioaddr + TxConfig);
		}
		if (debug > 1)
			printk(KERN_INFO "%s: Unspecified error. Tx "
			       "threshold now %2.2x.\n",
			       dev->name, rp->tx_thresh);
	}
	if (intr_status & (IntrTxAborted | IntrTxUnderrun | IntrTxDescRace |
			   IntrTxError))
		rhine_restart_tx(dev);

	if (intr_status & ~(IntrLinkChange | IntrStatsMax | IntrTxUnderrun |
			    IntrTxError | IntrTxAborted | IntrNormalSummary |
			    IntrTxDescRace)) {
		if (debug > 1)
			printk(KERN_ERR "%s: Something Wicked happened! "
			       "%8.8x.\n", dev->name, intr_status);
	}

	spin_unlock(&rp->lock);
}

static struct net_device_stats *rhine_get_stats(struct net_device *dev)
{
	struct rhine_private *rp = netdev_priv(dev);
	void __iomem *ioaddr = rp->base;
	unsigned long flags;

	spin_lock_irqsave(&rp->lock, flags);
	rp->stats.rx_crc_errors += ioread16(ioaddr + RxCRCErrs);
	rp->stats.rx_missed_errors += ioread16(ioaddr + RxMissed);
	clear_tally_counters(ioaddr);
	spin_unlock_irqrestore(&rp->lock, flags);

	return &rp->stats;
}

static void rhine_set_rx_mode(struct net_device *dev)
{
	struct rhine_private *rp = netdev_priv(dev);
	void __iomem *ioaddr = rp->base;
	u32 mc_filter[2];	/* Multicast hash filter */
	u8 rx_mode;		/* Note: 0x02=accept runt, 0x01=accept errs */

	if (dev->flags & IFF_PROMISC) {		/* Set promiscuous. */
		rx_mode = 0x1C;
		iowrite32(0xffffffff, ioaddr + MulticastFilter0);
		iowrite32(0xffffffff, ioaddr + MulticastFilter1);
	} else if ((dev->mc_count > multicast_filter_limit)
		   || (dev->flags & IFF_ALLMULTI)) {
		/* Too many to match, or accept all multicasts. */
		iowrite32(0xffffffff, ioaddr + MulticastFilter0);
		iowrite32(0xffffffff, ioaddr + MulticastFilter1);
		rx_mode = 0x0C;
	} else {
		struct dev_mc_list *mclist;
		int i;
		memset(mc_filter, 0, sizeof(mc_filter));
		for (i = 0, mclist = dev->mc_list; mclist && i < dev->mc_count;
		     i++, mclist = mclist->next) {
			int bit_nr = ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26;

			mc_filter[bit_nr >> 5] |= 1 << (bit_nr & 31);
		}
		iowrite32(mc_filter[0], ioaddr + MulticastFilter0);
		iowrite32(mc_filter[1], ioaddr + MulticastFilter1);
		rx_mode = 0x0C;
	}
	iowrite8(rp->rx_thresh | rx_mode, ioaddr + RxConfig);
}

static void netdev_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
{
	struct rhine_private *rp = netdev_priv(dev);

	strcpy(info->driver, DRV_NAME);
	strcpy(info->version, DRV_VERSION);
	strcpy(info->bus_info, pci_name(rp->pdev));
}

static int netdev_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
{
	struct rhine_private *rp = netdev_priv(dev);
	int rc;

	spin_lock_irq(&rp->lock);
	rc = mii_ethtool_gset(&rp->mii_if, cmd);
	spin_unlock_irq(&rp->lock);

	return rc;
}

static int netdev_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
{
	struct rhine_private *rp = netdev_priv(dev);
	int rc;

	spin_lock_irq(&rp->lock);
	rc = mii_ethtool_sset(&rp->mii_if, cmd);
	spin_unlock_irq(&rp->lock);
	rhine_set_carrier(&rp->mii_if);

	return rc;
}

static int netdev_nway_reset(struct net_device *dev)
{
	struct rhine_private *rp = netdev_priv(dev);

	return mii_nway_restart(&rp->mii_if);
}

static u32 netdev_get_link(struct net_device *dev)
{
	struct rhine_private *rp = netdev_priv(dev);

	return mii_link_ok(&rp->mii_if);
}

static u32 netdev_get_msglevel(struct net_device *dev)
{
	return debug;
}

static void netdev_set_msglevel(struct net_device *dev, u32 value)
{
	debug = value;
}

static void rhine_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
{
	struct rhine_private *rp = netdev_priv(dev);

	if (!(rp->quirks & rqWOL))
		return;

	spin_lock_irq(&rp->lock);
	wol->supported = WAKE_PHY | WAKE_MAGIC |
			 WAKE_UCAST | WAKE_MCAST | WAKE_BCAST;	/* Untested */
	wol->wolopts = rp->wolopts;
	spin_unlock_irq(&rp->lock);
}

static int rhine_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
{
	struct rhine_private *rp = netdev_priv(dev);
	u32 support = WAKE_PHY | WAKE_MAGIC |
		      WAKE_UCAST | WAKE_MCAST | WAKE_BCAST;	/* Untested */

	if (!(rp->quirks & rqWOL))
		return -EINVAL;

	if (wol->wolopts & ~support)
		return -EINVAL;

	spin_lock_irq(&rp->lock);
	rp->wolopts = wol->wolopts;
	spin_unlock_irq(&rp->lock);

	return 0;
}

static const struct ethtool_ops netdev_ethtool_ops = {
	.get_drvinfo		= netdev_get_drvinfo,
	.get_settings		= netdev_get_settings,
	.set_settings		= netdev_set_settings,
	.nway_reset		= netdev_nway_reset,
	.get_link		= netdev_get_link,
	.get_msglevel		= netdev_get_msglevel,
	.set_msglevel		= netdev_set_msglevel,
	.get_wol		= rhine_get_wol,
	.set_wol		= rhine_set_wol,
};

static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
{
	struct rhine_private *rp = netdev_priv(dev);
	int rc;

	if (!netif_running(dev))
		return -EINVAL;

	spin_lock_irq(&rp->lock);
	rc = generic_mii_ioctl(&rp->mii_if, if_mii(rq), cmd, NULL);
	spin_unlock_irq(&rp->lock);
	rhine_set_carrier(&rp->mii_if);

	return rc;
}

static int rhine_close(struct net_device *dev)
{
	struct rhine_private *rp = netdev_priv(dev);
	void __iomem *ioaddr = rp->base;

	spin_lock_irq(&rp->lock);

	netif_stop_queue(dev);
	napi_disable(&rp->napi);

	if (debug > 1)
		printk(KERN_DEBUG "%s: Shutting down ethercard, "
		       "status was %4.4x.\n",
		       dev->name, ioread16(ioaddr + ChipCmd));

	/* Switch to loopback mode to avoid hardware races. */
	iowrite8(rp->tx_thresh | 0x02, ioaddr + TxConfig);

	/* Disable interrupts by clearing the interrupt mask. */
	iowrite16(0x0000, ioaddr + IntrEnable);

	/* Stop the chip's Tx and Rx processes. */
	iowrite16(CmdStop, ioaddr + ChipCmd);

	spin_unlock_irq(&rp->lock);

	free_irq(rp->pdev->irq, dev);
	free_rbufs(dev);
	free_tbufs(dev);
	free_ring(dev);

	return 0;
}


static void __devexit rhine_remove_one(struct pci_dev *pdev)
{
	struct net_device *dev = pci_get_drvdata(pdev);
	struct rhine_private *rp = netdev_priv(dev);

	unregister_netdev(dev);

	pci_iounmap(pdev, rp->base);
	pci_release_regions(pdev);

	free_netdev(dev);
	pci_disable_device(pdev);
	pci_set_drvdata(pdev, NULL);
}

static void rhine_shutdown (struct pci_dev *pdev)
{
	struct net_device *dev = pci_get_drvdata(pdev);
	struct rhine_private *rp = netdev_priv(dev);
	void __iomem *ioaddr = rp->base;

	if (!(rp->quirks & rqWOL))
		return; /* Nothing to do for non-WOL adapters */

	rhine_power_init(dev);

	/* Make sure we use pattern 0, 1 and not 4, 5 */
	if (rp->quirks & rq6patterns)
		iowrite8(0x04, ioaddr + WOLcgClr);

	if (rp->wolopts & WAKE_MAGIC) {
		iowrite8(WOLmagic, ioaddr + WOLcrSet);
		/*
		 * Turn EEPROM-controlled wake-up back on -- some hardware may
		 * not cooperate otherwise.
		 */
		iowrite8(ioread8(ioaddr + ConfigA) | 0x03, ioaddr + ConfigA);
	}

	if (rp->wolopts & (WAKE_BCAST|WAKE_MCAST))
		iowrite8(WOLbmcast, ioaddr + WOLcgSet);

	if (rp->wolopts & WAKE_PHY)
		iowrite8(WOLlnkon | WOLlnkoff, ioaddr + WOLcrSet);

	if (rp->wolopts & WAKE_UCAST)
		iowrite8(WOLucast, ioaddr + WOLcrSet);

	if (rp->wolopts) {
		/* Enable legacy WOL (for old motherboards) */
		iowrite8(0x01, ioaddr + PwcfgSet);
		iowrite8(ioread8(ioaddr + StickyHW) | 0x04, ioaddr + StickyHW);
	}

	/* Hit power state D3 (sleep) */
	if (!avoid_D3)
		iowrite8(ioread8(ioaddr + StickyHW) | 0x03, ioaddr + StickyHW);

	/* TODO: Check use of pci_enable_wake() */

}

#ifdef CONFIG_PM
static int rhine_suspend(struct pci_dev *pdev, pm_message_t state)
{
	struct net_device *dev = pci_get_drvdata(pdev);
	struct rhine_private *rp = netdev_priv(dev);
	unsigned long flags;

	if (!netif_running(dev))
		return 0;

	napi_disable(&rp->napi);

	netif_device_detach(dev);
	pci_save_state(pdev);

	spin_lock_irqsave(&rp->lock, flags);
	rhine_shutdown(pdev);
	spin_unlock_irqrestore(&rp->lock, flags);

	free_irq(dev->irq, dev);
	return 0;
}

static int rhine_resume(struct pci_dev *pdev)
{
	struct net_device *dev = pci_get_drvdata(pdev);
	struct rhine_private *rp = netdev_priv(dev);
	unsigned long flags;
	int ret;

	if (!netif_running(dev))
		return 0;

        if (request_irq(dev->irq, rhine_interrupt, IRQF_SHARED, dev->name, dev))
		printk(KERN_ERR "via-rhine %s: request_irq failed\n", dev->name);

	ret = pci_set_power_state(pdev, PCI_D0);
	if (debug > 1)
		printk(KERN_INFO "%s: Entering power state D0 %s (%d).\n",
			dev->name, ret ? "failed" : "succeeded", ret);

	pci_restore_state(pdev);

	spin_lock_irqsave(&rp->lock, flags);
#ifdef USE_MMIO
	enable_mmio(rp->pioaddr, rp->quirks);
#endif
	rhine_power_init(dev);
	free_tbufs(dev);
	free_rbufs(dev);
	alloc_tbufs(dev);
	alloc_rbufs(dev);
	init_registers(dev);
	spin_unlock_irqrestore(&rp->lock, flags);

	netif_device_attach(dev);

	return 0;
}
#endif /* CONFIG_PM */

static struct pci_driver rhine_driver = {
	.name		= DRV_NAME,
	.id_table	= rhine_pci_tbl,
	.probe		= rhine_init_one,
	.remove		= __devexit_p(rhine_remove_one),
#ifdef CONFIG_PM
	.suspend	= rhine_suspend,
	.resume		= rhine_resume,
#endif /* CONFIG_PM */
	.shutdown =	rhine_shutdown,
};

static struct dmi_system_id __initdata rhine_dmi_table[] = {
	{
		.ident = "EPIA-M",
		.matches = {
			DMI_MATCH(DMI_BIOS_VENDOR, "Award Software International, Inc."),
			DMI_MATCH(DMI_BIOS_VERSION, "6.00 PG"),
		},
	},
	{
		.ident = "KV7",
		.matches = {
			DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"),
			DMI_MATCH(DMI_BIOS_VERSION, "6.00 PG"),
		},
	},
	{ NULL }
};

static int __init rhine_init(void)
{
/* when a module, this is printed whether or not devices are found in probe */
#ifdef MODULE
	printk(version);
#endif
	if (dmi_check_system(rhine_dmi_table)) {
		/* these BIOSes fail at PXE boot if chip is in D3 */
		avoid_D3 = 1;
		printk(KERN_WARNING "%s: Broken BIOS detected, avoid_D3 "
				    "enabled.\n",
		       DRV_NAME);
	}
	else if (avoid_D3)
		printk(KERN_INFO "%s: avoid_D3 set.\n", DRV_NAME);

	return pci_register_driver(&rhine_driver);
}


static void __exit rhine_cleanup(void)
{
	pci_unregister_driver(&rhine_driver);
}


module_init(rhine_init);
module_exit(rhine_cleanup);