aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ubifs/commit.c
blob: b49884c8c10e823cbe484e9d5c1fadddf16b5008 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
/*
 * This file is part of UBIFS.
 *
 * Copyright (C) 2006-2008 Nokia Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 as published by
 * the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 51
 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 *
 * Authors: Adrian Hunter
 *          Artem Bityutskiy (Битюцкий Артём)
 */

/*
 * This file implements functions that manage the running of the commit process.
 * Each affected module has its own functions to accomplish their part in the
 * commit and those functions are called here.
 *
 * The commit is the process whereby all updates to the index and LEB properties
 * are written out together and the journal becomes empty. This keeps the
 * file system consistent - at all times the state can be recreated by reading
 * the index and LEB properties and then replaying the journal.
 *
 * The commit is split into two parts named "commit start" and "commit end".
 * During commit start, the commit process has exclusive access to the journal
 * by holding the commit semaphore down for writing. As few I/O operations as
 * possible are performed during commit start, instead the nodes that are to be
 * written are merely identified. During commit end, the commit semaphore is no
 * longer held and the journal is again in operation, allowing users to continue
 * to use the file system while the bulk of the commit I/O is performed. The
 * purpose of this two-step approach is to prevent the commit from causing any
 * latency blips. Note that in any case, the commit does not prevent lookups
 * (as permitted by the TNC mutex), or access to VFS data structures e.g. page
 * cache.
 */

#include <linux/freezer.h>
#include <linux/kthread.h>
#include "ubifs.h"

/**
 * do_commit - commit the journal.
 * @c: UBIFS file-system description object
 *
 * This function implements UBIFS commit. It has to be called with commit lock
 * locked. Returns zero in case of success and a negative error code in case of
 * failure.
 */
static int do_commit(struct ubifs_info *c)
{
	int err, new_ltail_lnum, old_ltail_lnum, i;
	struct ubifs_zbranch zroot;
	struct ubifs_lp_stats lst;

	dbg_cmt("start");
	if (c->ro_media) {
		err = -EROFS;
		goto out_up;
	}

	/* Sync all write buffers (necessary for recovery) */
	for (i = 0; i < c->jhead_cnt; i++) {
		err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
		if (err)
			goto out_up;
	}

	c->cmt_no += 1;
	err = ubifs_gc_start_commit(c);
	if (err)
		goto out_up;
	err = dbg_check_lprops(c);
	if (err)
		goto out_up;
	err = ubifs_log_start_commit(c, &new_ltail_lnum);
	if (err)
		goto out_up;
	err = ubifs_tnc_start_commit(c, &zroot);
	if (err)
		goto out_up;
	err = ubifs_lpt_start_commit(c);
	if (err)
		goto out_up;
	err = ubifs_orphan_start_commit(c);
	if (err)
		goto out_up;

	ubifs_get_lp_stats(c, &lst);

	up_write(&c->commit_sem);

	err = ubifs_tnc_end_commit(c);
	if (err)
		goto out;
	err = ubifs_lpt_end_commit(c);
	if (err)
		goto out;
	err = ubifs_orphan_end_commit(c);
	if (err)
		goto out;
	old_ltail_lnum = c->ltail_lnum;
	err = ubifs_log_end_commit(c, new_ltail_lnum);
	if (err)
		goto out;
	err = dbg_check_old_index(c, &zroot);
	if (err)
		goto out;

	mutex_lock(&c->mst_mutex);
	c->mst_node->cmt_no      = cpu_to_le64(c->cmt_no);
	c->mst_node->log_lnum    = cpu_to_le32(new_ltail_lnum);
	c->mst_node->root_lnum   = cpu_to_le32(zroot.lnum);
	c->mst_node->root_offs   = cpu_to_le32(zroot.offs);
	c->mst_node->root_len    = cpu_to_le32(zroot.len);
	c->mst_node->ihead_lnum  = cpu_to_le32(c->ihead_lnum);
	c->mst_node->ihead_offs  = cpu_to_le32(c->ihead_offs);
	c->mst_node->index_size  = cpu_to_le64(c->old_idx_sz);
	c->mst_node->lpt_lnum    = cpu_to_le32(c->lpt_lnum);
	c->mst_node->lpt_offs    = cpu_to_le32(c->lpt_offs);
	c->mst_node->nhead_lnum  = cpu_to_le32(c->nhead_lnum);
	c->mst_node->nhead_offs  = cpu_to_le32(c->nhead_offs);
	c->mst_node->ltab_lnum   = cpu_to_le32(c->ltab_lnum);
	c->mst_node->ltab_offs   = cpu_to_le32(c->ltab_offs);
	c->mst_node->lsave_lnum  = cpu_to_le32(c->lsave_lnum);
	c->mst_node->lsave_offs  = cpu_to_le32(c->lsave_offs);
	c->mst_node->lscan_lnum  = cpu_to_le32(c->lscan_lnum);
	c->mst_node->empty_lebs  = cpu_to_le32(lst.empty_lebs);
	c->mst_node->idx_lebs    = cpu_to_le32(lst.idx_lebs);
	c->mst_node->total_free  = cpu_to_le64(lst.total_free);
	c->mst_node->total_dirty = cpu_to_le64(lst.total_dirty);
	c->mst_node->total_used  = cpu_to_le64(lst.total_used);
	c->mst_node->total_dead  = cpu_to_le64(lst.total_dead);
	c->mst_node->total_dark  = cpu_to_le64(lst.total_dark);
	if (c->no_orphs)
		c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS);
	else
		c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_NO_ORPHS);
	err = ubifs_write_master(c);
	mutex_unlock(&c->mst_mutex);
	if (err)
		goto out;

	err = ubifs_log_post_commit(c, old_ltail_lnum);
	if (err)
		goto out;
	err = ubifs_gc_end_commit(c);
	if (err)
		goto out;
	err = ubifs_lpt_post_commit(c);
	if (err)
		goto out;

	spin_lock(&c->cs_lock);
	c->cmt_state = COMMIT_RESTING;
	wake_up(&c->cmt_wq);
	dbg_cmt("commit end");
	spin_unlock(&c->cs_lock);

	return 0;

out_up:
	up_write(&c->commit_sem);
out:
	ubifs_err("commit failed, error %d", err);
	spin_lock(&c->cs_lock);
	c->cmt_state = COMMIT_BROKEN;
	wake_up(&c->cmt_wq);
	spin_unlock(&c->cs_lock);
	ubifs_ro_mode(c, err);
	return err;
}

/**
 * run_bg_commit - run background commit if it is needed.
 * @c: UBIFS file-system description object
 *
 * This function runs background commit if it is needed. Returns zero in case
 * of success and a negative error code in case of failure.
 */
static int run_bg_commit(struct ubifs_info *c)
{
	spin_lock(&c->cs_lock);
	/*
	 * Run background commit only if background commit was requested or if
	 * commit is required.
	 */
	if (c->cmt_state != COMMIT_BACKGROUND &&
	    c->cmt_state != COMMIT_REQUIRED)
		goto out;
	spin_unlock(&c->cs_lock);

	down_write(&c->commit_sem);
	spin_lock(&c->cs_lock);
	if (c->cmt_state == COMMIT_REQUIRED)
		c->cmt_state = COMMIT_RUNNING_REQUIRED;
	else if (c->cmt_state == COMMIT_BACKGROUND)
		c->cmt_state = COMMIT_RUNNING_BACKGROUND;
	else
		goto out_cmt_unlock;
	spin_unlock(&c->cs_lock);

	return do_commit(c);

out_cmt_unlock:
	up_write(&c->commit_sem);
out:
	spin_unlock(&c->cs_lock);
	return 0;
}

/**
 * ubifs_bg_thread - UBIFS background thread function.
 * @info: points to the file-system description object
 *
 * This function implements various file-system background activities:
 * o when a write-buffer timer expires it synchronizes the appropriate
 *   write-buffer;
 * o when the journal is about to be full, it starts in-advance commit.
 *
 * Note, other stuff like background garbage collection may be added here in
 * future.
 */
int ubifs_bg_thread(void *info)
{
	int err;
	struct ubifs_info *c = info;

	dbg_msg("background thread \"%s\" started, PID %d",
		c->bgt_name, current->pid);
	set_freezable();

	while (1) {
		if (kthread_should_stop())
			break;

		if (try_to_freeze())
			continue;

		set_current_state(TASK_INTERRUPTIBLE);
		/* Check if there is something to do */
		if (!c->need_bgt) {
			/*
			 * Nothing prevents us from going sleep now and
			 * be never woken up and block the task which
			 * could wait in 'kthread_stop()' forever.
			 */
			if (kthread_should_stop())
				break;
			schedule();
			continue;
		} else
			__set_current_state(TASK_RUNNING);

		c->need_bgt = 0;
		err = ubifs_bg_wbufs_sync(c);
		if (err)
			ubifs_ro_mode(c, err);

		run_bg_commit(c);
		cond_resched();
	}

	dbg_msg("background thread \"%s\" stops", c->bgt_name);
	return 0;
}

/**
 * ubifs_commit_required - set commit state to "required".
 * @c: UBIFS file-system description object
 *
 * This function is called if a commit is required but cannot be done from the
 * calling function, so it is just flagged instead.
 */
void ubifs_commit_required(struct ubifs_info *c)
{
	spin_lock(&c->cs_lock);
	switch (c->cmt_state) {
	case COMMIT_RESTING:
	case COMMIT_BACKGROUND:
		dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state),
			dbg_cstate(COMMIT_REQUIRED));
		c->cmt_state = COMMIT_REQUIRED;
		break;
	case COMMIT_RUNNING_BACKGROUND:
		dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state),
			dbg_cstate(COMMIT_RUNNING_REQUIRED));
		c->cmt_state = COMMIT_RUNNING_REQUIRED;
		break;
	case COMMIT_REQUIRED:
	case COMMIT_RUNNING_REQUIRED:
	case COMMIT_BROKEN:
		break;
	}
	spin_unlock(&c->cs_lock);
}

/**
 * ubifs_request_bg_commit - notify the background thread to do a commit.
 * @c: UBIFS file-system description object
 *
 * This function is called if the journal is full enough to make a commit
 * worthwhile, so background thread is kicked to start it.
 */
void ubifs_request_bg_commit(struct ubifs_info *c)
{
	spin_lock(&c->cs_lock);
	if (c->cmt_state == COMMIT_RESTING) {
		dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state),
			dbg_cstate(COMMIT_BACKGROUND));
		c->cmt_state = COMMIT_BACKGROUND;
		spin_unlock(&c->cs_lock);
		ubifs_wake_up_bgt(c);
	} else
		spin_unlock(&c->cs_lock);
}

/**
 * wait_for_commit - wait for commit.
 * @c: UBIFS file-system description object
 *
 * This function sleeps until the commit operation is no longer running.
 */
static int wait_for_commit(struct ubifs_info *c)
{
	dbg_cmt("pid %d goes sleep", current->pid);

	/*
	 * The following sleeps if the condition is false, and will be woken
	 * when the commit ends. It is possible, although very unlikely, that we
	 * will wake up and see the subsequent commit running, rather than the
	 * one we were waiting for, and go back to sleep.  However, we will be
	 * woken again, so there is no danger of sleeping forever.
	 */
	wait_event(c->cmt_wq, c->cmt_state != COMMIT_RUNNING_BACKGROUND &&
			      c->cmt_state != COMMIT_RUNNING_REQUIRED);
	dbg_cmt("commit finished, pid %d woke up", current->pid);
	return 0;
}

/**
 * ubifs_run_commit - run or wait for commit.
 * @c: UBIFS file-system description object
 *
 * This function runs commit and returns zero in case of success and a negative
 * error code in case of failure.
 */
int ubifs_run_commit(struct ubifs_info *c)
{
	int err = 0;

	spin_lock(&c->cs_lock);
	if (c->cmt_state == COMMIT_BROKEN) {
		err = -EINVAL;
		goto out;
	}

	if (c->cmt_state == COMMIT_RUNNING_BACKGROUND)
		/*
		 * We set the commit state to 'running required' to indicate
		 * that we want it to complete as quickly as possible.
		 */
		c->cmt_state = COMMIT_RUNNING_REQUIRED;

	if (c->cmt_state == COMMIT_RUNNING_REQUIRED) {
		spin_unlock(&c->cs_lock);
		return wait_for_commit(c);
	}
	spin_unlock(&c->cs_lock);

	/* Ok, the commit is indeed needed */

	down_write(&c->commit_sem);
	spin_lock(&c->cs_lock);
	/*
	 * Since we unlocked 'c->cs_lock', the state may have changed, so
	 * re-check it.
	 */
	if (c->cmt_state == COMMIT_BROKEN) {
		err = -EINVAL;
		goto out_cmt_unlock;
	}

	if (c->cmt_state == COMMIT_RUNNING_BACKGROUND)
		c->cmt_state = COMMIT_RUNNING_REQUIRED;

	if (c->cmt_state == COMMIT_RUNNING_REQUIRED) {
		up_write(&c->commit_sem);
		spin_unlock(&c->cs_lock);
		return wait_for_commit(c);
	}
	c->cmt_state = COMMIT_RUNNING_REQUIRED;
	spin_unlock(&c->cs_lock);

	err = do_commit(c);
	return err;

out_cmt_unlock:
	up_write(&c->commit_sem);
out:
	spin_unlock(&c->cs_lock);
	return err;
}

/**
 * ubifs_gc_should_commit - determine if it is time for GC to run commit.
 * @c: UBIFS file-system description object
 *
 * This function is called by garbage collection to determine if commit should
 * be run. If commit state is @COMMIT_BACKGROUND, which means that the journal
 * is full enough to start commit, this function returns true. It is not
 * absolutely necessary to commit yet, but it feels like this should be better
 * then to keep doing GC. This function returns %1 if GC has to initiate commit
 * and %0 if not.
 */
int ubifs_gc_should_commit(struct ubifs_info *c)
{
	int ret = 0;

	spin_lock(&c->cs_lock);
	if (c->cmt_state == COMMIT_BACKGROUND) {
		dbg_cmt("commit required now");
		c->cmt_state = COMMIT_REQUIRED;
	} else
		dbg_cmt("commit not requested");
	if (c->cmt_state == COMMIT_REQUIRED)
		ret = 1;
	spin_unlock(&c->cs_lock);
	return ret;
}

#ifdef CONFIG_UBIFS_FS_DEBUG

/**
 * struct idx_node - hold index nodes during index tree traversal.
 * @list: list
 * @iip: index in parent (slot number of this indexing node in the parent
 *       indexing node)
 * @upper_key: all keys in this indexing node have to be less or equivalent to
 *             this key
 * @idx: index node (8-byte aligned because all node structures must be 8-byte
 *       aligned)
 */
struct idx_node {
	struct list_head list;
	int iip;
	union ubifs_key upper_key;
	struct ubifs_idx_node idx __attribute__((aligned(8)));
};

/**
 * dbg_old_index_check_init - get information for the next old index check.
 * @c: UBIFS file-system description object
 * @zroot: root of the index
 *
 * This function records information about the index that will be needed for the
 * next old index check i.e. 'dbg_check_old_index()'.
 *
 * This function returns %0 on success and a negative error code on failure.
 */
int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot)
{
	struct ubifs_idx_node *idx;
	int lnum, offs, len, err = 0;

	c->old_zroot = *zroot;

	lnum = c->old_zroot.lnum;
	offs = c->old_zroot.offs;
	len = c->old_zroot.len;

	idx = kmalloc(c->max_idx_node_sz, GFP_NOFS);
	if (!idx)
		return -ENOMEM;

	err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs);
	if (err)
		goto out;

	c->old_zroot_level = le16_to_cpu(idx->level);
	c->old_zroot_sqnum = le64_to_cpu(idx->ch.sqnum);
out:
	kfree(idx);
	return err;
}

/**
 * dbg_check_old_index - check the old copy of the index.
 * @c: UBIFS file-system description object
 * @zroot: root of the new index
 *
 * In order to be able to recover from an unclean unmount, a complete copy of
 * the index must exist on flash. This is the "old" index. The commit process
 * must write the "new" index to flash without overwriting or destroying any
 * part of the old index. This function is run at commit end in order to check
 * that the old index does indeed exist completely intact.
 *
 * This function returns %0 on success and a negative error code on failure.
 */
int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot)
{
	int lnum, offs, len, err = 0, uninitialized_var(last_level), child_cnt;
	int first = 1, iip;
	union ubifs_key lower_key, upper_key, l_key, u_key;
	unsigned long long uninitialized_var(last_sqnum);
	struct ubifs_idx_node *idx;
	struct list_head list;
	struct idx_node *i;
	size_t sz;

	if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX))
		goto out;

	INIT_LIST_HEAD(&list);

	sz = sizeof(struct idx_node) + ubifs_idx_node_sz(c, c->fanout) -
	     UBIFS_IDX_NODE_SZ;

	/* Start at the old zroot */
	lnum = c->old_zroot.lnum;
	offs = c->old_zroot.offs;
	len = c->old_zroot.len;
	iip = 0;

	/*
	 * Traverse the index tree preorder depth-first i.e. do a node and then
	 * its subtrees from left to right.
	 */
	while (1) {
		struct ubifs_branch *br;

		/* Get the next index node */
		i = kmalloc(sz, GFP_NOFS);
		if (!i) {
			err = -ENOMEM;
			goto out_free;
		}
		i->iip = iip;
		/* Keep the index nodes on our path in a linked list */
		list_add_tail(&i->list, &list);
		/* Read the index node */
		idx = &i->idx;
		err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs);
		if (err)
			goto out_free;
		/* Validate index node */
		child_cnt = le16_to_cpu(idx->child_cnt);
		if (child_cnt < 1 || child_cnt > c->fanout) {
			err = 1;
			goto out_dump;
		}
		if (first) {
			first = 0;
			/* Check root level and sqnum */
			if (le16_to_cpu(idx->level) != c->old_zroot_level) {
				err = 2;
				goto out_dump;
			}
			if (le64_to_cpu(idx->ch.sqnum) != c->old_zroot_sqnum) {
				err = 3;
				goto out_dump;
			}
			/* Set last values as though root had a parent */
			last_level = le16_to_cpu(idx->level) + 1;
			last_sqnum = le64_to_cpu(idx->ch.sqnum) + 1;
			key_read(c, ubifs_idx_key(c, idx), &lower_key);
			highest_ino_key(c, &upper_key, INUM_WATERMARK);
		}
		key_copy(c, &upper_key, &i->upper_key);
		if (le16_to_cpu(idx->level) != last_level - 1) {
			err = 3;
			goto out_dump;
		}
		/*
		 * The index is always written bottom up hence a child's sqnum
		 * is always less than the parents.
		 */
		if (le64_to_cpu(idx->ch.sqnum) >= last_sqnum) {
			err = 4;
			goto out_dump;
		}
		/* Check key range */
		key_read(c, ubifs_idx_key(c, idx), &l_key);
		br = ubifs_idx_branch(c, idx, child_cnt - 1);
		key_read(c, &br->key, &u_key);
		if (keys_cmp(c, &lower_key, &l_key) > 0) {
			err = 5;
			goto out_dump;
		}
		if (keys_cmp(c, &upper_key, &u_key) < 0) {
			err = 6;
			goto out_dump;
		}
		if (keys_cmp(c, &upper_key, &u_key) == 0)
			if (!is_hash_key(c, &u_key)) {
				err = 7;
				goto out_dump;
			}
		/* Go to next index node */
		if (le16_to_cpu(idx->level) == 0) {
			/* At the bottom, so go up until can go right */
			while (1) {
				/* Drop the bottom of the list */
				list_del(&i->list);
				kfree(i);
				/* No more list means we are done */
				if (list_empty(&list))
					goto out;
				/* Look at the new bottom */
				i = list_entry(list.prev, struct idx_node,
					       list);
				idx = &i->idx;
				/* Can we go right */
				if (iip + 1 < le16_to_cpu(idx->child_cnt)) {
					iip = iip + 1;
					break;
				} else
					/* Nope, so go up again */
					iip = i->iip;
			}
		} else
			/* Go down left */
			iip = 0;
		/*
		 * We have the parent in 'idx' and now we set up for reading the
		 * child pointed to by slot 'iip'.
		 */
		last_level = le16_to_cpu(idx->level);
		last_sqnum = le64_to_cpu(idx->ch.sqnum);
		br = ubifs_idx_branch(c, idx, iip);
		lnum = le32_to_cpu(br->lnum);
		offs = le32_to_cpu(br->offs);
		len = le32_to_cpu(br->len);
		key_read(c, &br->key, &lower_key);
		if (iip + 1 < le16_to_cpu(idx->child_cnt)) {
			br = ubifs_idx_branch(c, idx, iip + 1);
			key_read(c, &br->key, &upper_key);
		} else
			key_copy(c, &i->upper_key, &upper_key);
	}
out:
	err = dbg_old_index_check_init(c, zroot);
	if (err)
		goto out_free;

	return 0;

out_dump:
	dbg_err("dumping index node (iip=%d)", i->iip);
	dbg_dump_node(c, idx);
	list_del(&i->list);
	kfree(i);
	if (!list_empty(&list)) {
		i = list_entry(list.prev, struct idx_node, list);
		dbg_err("dumping parent index node");
		dbg_dump_node(c, &i->idx);
	}
out_free:
	while (!list_empty(&list)) {
		i = list_entry(list.next, struct idx_node, list);
		list_del(&i->list);
		kfree(i);
	}
	ubifs_err("failed, error %d", err);
	if (err > 0)
		err = -EINVAL;
	return err;
}

#endif /* CONFIG_UBIFS_FS_DEBUG */
9' href='#n2589'>2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688
/*
 * Copyright (C) 2001,2002,2003,2004 Broadcom Corporation
 * Copyright (c) 2006, 2007  Maciej W. Rozycki
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 *
 *
 * This driver is designed for the Broadcom SiByte SOC built-in
 * Ethernet controllers. Written by Mitch Lichtenberg at Broadcom Corp.
 *
 * Updated to the driver model and the PHY abstraction layer
 * by Maciej W. Rozycki.
 */

#include <linux/bug.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/timer.h>
#include <linux/errno.h>
#include <linux/ioport.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/bitops.h>
#include <linux/err.h>
#include <linux/ethtool.h>
#include <linux/mii.h>
#include <linux/phy.h>
#include <linux/platform_device.h>

#include <asm/cache.h>
#include <asm/io.h>
#include <asm/processor.h>	/* Processor type for cache alignment. */

/* Operational parameters that usually are not changed. */

#define CONFIG_SBMAC_COALESCE

/* Time in jiffies before concluding the transmitter is hung. */
#define TX_TIMEOUT  (2*HZ)


MODULE_AUTHOR("Mitch Lichtenberg (Broadcom Corp.)");
MODULE_DESCRIPTION("Broadcom SiByte SOC GB Ethernet driver");

/* A few user-configurable values which may be modified when a driver
   module is loaded. */

/* 1 normal messages, 0 quiet .. 7 verbose. */
static int debug = 1;
module_param(debug, int, S_IRUGO);
MODULE_PARM_DESC(debug, "Debug messages");

#ifdef CONFIG_SBMAC_COALESCE
static int int_pktcnt_tx = 255;
module_param(int_pktcnt_tx, int, S_IRUGO);
MODULE_PARM_DESC(int_pktcnt_tx, "TX packet count");

static int int_timeout_tx = 255;
module_param(int_timeout_tx, int, S_IRUGO);
MODULE_PARM_DESC(int_timeout_tx, "TX timeout value");

static int int_pktcnt_rx = 64;
module_param(int_pktcnt_rx, int, S_IRUGO);
MODULE_PARM_DESC(int_pktcnt_rx, "RX packet count");

static int int_timeout_rx = 64;
module_param(int_timeout_rx, int, S_IRUGO);
MODULE_PARM_DESC(int_timeout_rx, "RX timeout value");
#endif

#include <asm/sibyte/board.h>
#include <asm/sibyte/sb1250.h>
#if defined(CONFIG_SIBYTE_BCM1x55) || defined(CONFIG_SIBYTE_BCM1x80)
#include <asm/sibyte/bcm1480_regs.h>
#include <asm/sibyte/bcm1480_int.h>
#define R_MAC_DMA_OODPKTLOST_RX	R_MAC_DMA_OODPKTLOST
#elif defined(CONFIG_SIBYTE_SB1250) || defined(CONFIG_SIBYTE_BCM112X)
#include <asm/sibyte/sb1250_regs.h>
#include <asm/sibyte/sb1250_int.h>
#else
#error invalid SiByte MAC configuation
#endif
#include <asm/sibyte/sb1250_scd.h>
#include <asm/sibyte/sb1250_mac.h>
#include <asm/sibyte/sb1250_dma.h>

#if defined(CONFIG_SIBYTE_BCM1x55) || defined(CONFIG_SIBYTE_BCM1x80)
#define UNIT_INT(n)		(K_BCM1480_INT_MAC_0 + ((n) * 2))
#elif defined(CONFIG_SIBYTE_SB1250) || defined(CONFIG_SIBYTE_BCM112X)
#define UNIT_INT(n)		(K_INT_MAC_0 + (n))
#else
#error invalid SiByte MAC configuation
#endif

#ifdef K_INT_PHY
#define SBMAC_PHY_INT			K_INT_PHY
#else
#define SBMAC_PHY_INT			PHY_POLL
#endif

/**********************************************************************
 *  Simple types
 ********************************************************************* */

enum sbmac_speed {
	sbmac_speed_none = 0,
	sbmac_speed_10 = SPEED_10,
	sbmac_speed_100 = SPEED_100,
	sbmac_speed_1000 = SPEED_1000,
};

enum sbmac_duplex {
	sbmac_duplex_none = -1,
	sbmac_duplex_half = DUPLEX_HALF,
	sbmac_duplex_full = DUPLEX_FULL,
};

enum sbmac_fc {
	sbmac_fc_none,
	sbmac_fc_disabled,
	sbmac_fc_frame,
	sbmac_fc_collision,
	sbmac_fc_carrier,
};

enum sbmac_state {
	sbmac_state_uninit,
	sbmac_state_off,
	sbmac_state_on,
	sbmac_state_broken,
};


/**********************************************************************
 *  Macros
 ********************************************************************* */


#define SBDMA_NEXTBUF(d,f) ((((d)->f+1) == (d)->sbdma_dscrtable_end) ? \
			  (d)->sbdma_dscrtable : (d)->f+1)


#define NUMCACHEBLKS(x) (((x)+SMP_CACHE_BYTES-1)/SMP_CACHE_BYTES)

#define SBMAC_MAX_TXDESCR	256
#define SBMAC_MAX_RXDESCR	256

#define ETHER_ADDR_LEN		6
#define ENET_PACKET_SIZE	1518
/*#define ENET_PACKET_SIZE	9216 */

/**********************************************************************
 *  DMA Descriptor structure
 ********************************************************************* */

struct sbdmadscr {
	uint64_t  dscr_a;
	uint64_t  dscr_b;
};

/**********************************************************************
 *  DMA Controller structure
 ********************************************************************* */

struct sbmacdma {

	/*
	 * This stuff is used to identify the channel and the registers
	 * associated with it.
	 */
	struct sbmac_softc	*sbdma_eth;	/* back pointer to associated
						   MAC */
	int			sbdma_channel;	/* channel number */
	int			sbdma_txdir;	/* direction (1=transmit) */
	int			sbdma_maxdescr;	/* total # of descriptors
						   in ring */
#ifdef CONFIG_SBMAC_COALESCE
	int			sbdma_int_pktcnt;
						/* # descriptors rx/tx
						   before interrupt */
	int			sbdma_int_timeout;
						/* # usec rx/tx interrupt */
#endif
	void __iomem		*sbdma_config0;	/* DMA config register 0 */
	void __iomem		*sbdma_config1;	/* DMA config register 1 */
	void __iomem		*sbdma_dscrbase;
						/* descriptor base address */
	void __iomem		*sbdma_dscrcnt;	/* descriptor count register */
	void __iomem		*sbdma_curdscr;	/* current descriptor
						   address */
	void __iomem		*sbdma_oodpktlost;
						/* pkt drop (rx only) */

	/*
	 * This stuff is for maintenance of the ring
	 */
	void			*sbdma_dscrtable_unaligned;
	struct sbdmadscr	*sbdma_dscrtable;
						/* base of descriptor table */
	struct sbdmadscr	*sbdma_dscrtable_end;
						/* end of descriptor table */
	struct sk_buff		**sbdma_ctxtable;
						/* context table, one
						   per descr */
	dma_addr_t		sbdma_dscrtable_phys;
						/* and also the phys addr */
	struct sbdmadscr	*sbdma_addptr;	/* next dscr for sw to add */
	struct sbdmadscr	*sbdma_remptr;	/* next dscr for sw
						   to remove */
};


/**********************************************************************
 *  Ethernet softc structure
 ********************************************************************* */

struct sbmac_softc {

	/*
	 * Linux-specific things
	 */
	struct net_device	*sbm_dev;	/* pointer to linux device */
	struct napi_struct	napi;
	struct phy_device	*phy_dev;	/* the associated PHY device */
	struct mii_bus		*mii_bus;	/* the MII bus */
	int			phy_irq[PHY_MAX_ADDR];
	spinlock_t		sbm_lock;	/* spin lock */
	int			sbm_devflags;	/* current device flags */

	/*
	 * Controller-specific things
	 */
	void __iomem		*sbm_base;	/* MAC's base address */
	enum sbmac_state	sbm_state;	/* current state */

	void __iomem		*sbm_macenable;	/* MAC Enable Register */
	void __iomem		*sbm_maccfg;	/* MAC Config Register */
	void __iomem		*sbm_fifocfg;	/* FIFO Config Register */
	void __iomem		*sbm_framecfg;	/* Frame Config Register */
	void __iomem		*sbm_rxfilter;	/* Receive Filter Register */
	void __iomem		*sbm_isr;	/* Interrupt Status Register */
	void __iomem		*sbm_imr;	/* Interrupt Mask Register */
	void __iomem		*sbm_mdio;	/* MDIO Register */

	enum sbmac_speed	sbm_speed;	/* current speed */
	enum sbmac_duplex	sbm_duplex;	/* current duplex */
	enum sbmac_fc		sbm_fc;		/* cur. flow control setting */
	int			sbm_pause;	/* current pause setting */
	int			sbm_link;	/* current link state */

	unsigned char		sbm_hwaddr[ETHER_ADDR_LEN];

	struct sbmacdma		sbm_txdma;	/* only channel 0 for now */
	struct sbmacdma		sbm_rxdma;
	int			rx_hw_checksum;
	int			sbe_idx;
};


/**********************************************************************
 *  Externs
 ********************************************************************* */

/**********************************************************************
 *  Prototypes
 ********************************************************************* */

static void sbdma_initctx(struct sbmacdma *d, struct sbmac_softc *s, int chan,
			  int txrx, int maxdescr);
static void sbdma_channel_start(struct sbmacdma *d, int rxtx);
static int sbdma_add_rcvbuffer(struct sbmac_softc *sc, struct sbmacdma *d,
			       struct sk_buff *m);
static int sbdma_add_txbuffer(struct sbmacdma *d, struct sk_buff *m);
static void sbdma_emptyring(struct sbmacdma *d);
static void sbdma_fillring(struct sbmac_softc *sc, struct sbmacdma *d);
static int sbdma_rx_process(struct sbmac_softc *sc, struct sbmacdma *d,
			    int work_to_do, int poll);
static void sbdma_tx_process(struct sbmac_softc *sc, struct sbmacdma *d,
			     int poll);
static int sbmac_initctx(struct sbmac_softc *s);
static void sbmac_channel_start(struct sbmac_softc *s);
static void sbmac_channel_stop(struct sbmac_softc *s);
static enum sbmac_state sbmac_set_channel_state(struct sbmac_softc *,
						enum sbmac_state);
static void sbmac_promiscuous_mode(struct sbmac_softc *sc, int onoff);
static uint64_t sbmac_addr2reg(unsigned char *ptr);
static irqreturn_t sbmac_intr(int irq, void *dev_instance);
static int sbmac_start_tx(struct sk_buff *skb, struct net_device *dev);
static void sbmac_setmulti(struct sbmac_softc *sc);
static int sbmac_init(struct platform_device *pldev, long long base);
static int sbmac_set_speed(struct sbmac_softc *s, enum sbmac_speed speed);
static int sbmac_set_duplex(struct sbmac_softc *s, enum sbmac_duplex duplex,
			    enum sbmac_fc fc);

static int sbmac_open(struct net_device *dev);
static void sbmac_tx_timeout (struct net_device *dev);
static void sbmac_set_rx_mode(struct net_device *dev);
static int sbmac_mii_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
static int sbmac_close(struct net_device *dev);
static int sbmac_poll(struct napi_struct *napi, int budget);

static void sbmac_mii_poll(struct net_device *dev);
static int sbmac_mii_probe(struct net_device *dev);

static void sbmac_mii_sync(void __iomem *sbm_mdio);
static void sbmac_mii_senddata(void __iomem *sbm_mdio, unsigned int data,
			       int bitcnt);
static int sbmac_mii_read(struct mii_bus *bus, int phyaddr, int regidx);
static int sbmac_mii_write(struct mii_bus *bus, int phyaddr, int regidx,
			   u16 val);


/**********************************************************************
 *  Globals
 ********************************************************************* */

static char sbmac_string[] = "sb1250-mac";

static char sbmac_mdio_string[] = "sb1250-mac-mdio";


/**********************************************************************
 *  MDIO constants
 ********************************************************************* */

#define	MII_COMMAND_START	0x01
#define	MII_COMMAND_READ	0x02
#define	MII_COMMAND_WRITE	0x01
#define	MII_COMMAND_ACK		0x02

#define M_MAC_MDIO_DIR_OUTPUT	0		/* for clarity */

#define ENABLE 		1
#define DISABLE		0

/**********************************************************************
 *  SBMAC_MII_SYNC(sbm_mdio)
 *
 *  Synchronize with the MII - send a pattern of bits to the MII
 *  that will guarantee that it is ready to accept a command.
 *
 *  Input parameters:
 *  	   sbm_mdio - address of the MAC's MDIO register
 *
 *  Return value:
 *  	   nothing
 ********************************************************************* */

static void sbmac_mii_sync(void __iomem *sbm_mdio)
{
	int cnt;
	uint64_t bits;
	int mac_mdio_genc;

	mac_mdio_genc = __raw_readq(sbm_mdio) & M_MAC_GENC;

	bits = M_MAC_MDIO_DIR_OUTPUT | M_MAC_MDIO_OUT;

	__raw_writeq(bits | mac_mdio_genc, sbm_mdio);

	for (cnt = 0; cnt < 32; cnt++) {
		__raw_writeq(bits | M_MAC_MDC | mac_mdio_genc, sbm_mdio);
		__raw_writeq(bits | mac_mdio_genc, sbm_mdio);
	}
}

/**********************************************************************
 *  SBMAC_MII_SENDDATA(sbm_mdio, data, bitcnt)
 *
 *  Send some bits to the MII.  The bits to be sent are right-
 *  justified in the 'data' parameter.
 *
 *  Input parameters:
 *  	   sbm_mdio - address of the MAC's MDIO register
 *  	   data     - data to send
 *  	   bitcnt   - number of bits to send
 ********************************************************************* */

static void sbmac_mii_senddata(void __iomem *sbm_mdio, unsigned int data,
			       int bitcnt)
{
	int i;
	uint64_t bits;
	unsigned int curmask;
	int mac_mdio_genc;

	mac_mdio_genc = __raw_readq(sbm_mdio) & M_MAC_GENC;

	bits = M_MAC_MDIO_DIR_OUTPUT;
	__raw_writeq(bits | mac_mdio_genc, sbm_mdio);

	curmask = 1 << (bitcnt - 1);

	for (i = 0; i < bitcnt; i++) {
		if (data & curmask)
			bits |= M_MAC_MDIO_OUT;
		else bits &= ~M_MAC_MDIO_OUT;
		__raw_writeq(bits | mac_mdio_genc, sbm_mdio);
		__raw_writeq(bits | M_MAC_MDC | mac_mdio_genc, sbm_mdio);
		__raw_writeq(bits | mac_mdio_genc, sbm_mdio);
		curmask >>= 1;
	}
}



/**********************************************************************
 *  SBMAC_MII_READ(bus, phyaddr, regidx)
 *  Read a PHY register.
 *
 *  Input parameters:
 *  	   bus     - MDIO bus handle
 *  	   phyaddr - PHY's address
 *  	   regnum  - index of register to read
 *
 *  Return value:
 *  	   value read, or 0xffff if an error occurred.
 ********************************************************************* */

static int sbmac_mii_read(struct mii_bus *bus, int phyaddr, int regidx)
{
	struct sbmac_softc *sc = (struct sbmac_softc *)bus->priv;
	void __iomem *sbm_mdio = sc->sbm_mdio;
	int idx;
	int error;
	int regval;
	int mac_mdio_genc;

	/*
	 * Synchronize ourselves so that the PHY knows the next
	 * thing coming down is a command
	 */
	sbmac_mii_sync(sbm_mdio);

	/*
	 * Send the data to the PHY.  The sequence is
	 * a "start" command (2 bits)
	 * a "read" command (2 bits)
	 * the PHY addr (5 bits)
	 * the register index (5 bits)
	 */
	sbmac_mii_senddata(sbm_mdio, MII_COMMAND_START, 2);
	sbmac_mii_senddata(sbm_mdio, MII_COMMAND_READ, 2);
	sbmac_mii_senddata(sbm_mdio, phyaddr, 5);
	sbmac_mii_senddata(sbm_mdio, regidx, 5);

	mac_mdio_genc = __raw_readq(sbm_mdio) & M_MAC_GENC;

	/*
	 * Switch the port around without a clock transition.
	 */
	__raw_writeq(M_MAC_MDIO_DIR_INPUT | mac_mdio_genc, sbm_mdio);

	/*
	 * Send out a clock pulse to signal we want the status
	 */
	__raw_writeq(M_MAC_MDIO_DIR_INPUT | M_MAC_MDC | mac_mdio_genc,
		     sbm_mdio);
	__raw_writeq(M_MAC_MDIO_DIR_INPUT | mac_mdio_genc, sbm_mdio);

	/*
	 * If an error occurred, the PHY will signal '1' back
	 */
	error = __raw_readq(sbm_mdio) & M_MAC_MDIO_IN;

	/*
	 * Issue an 'idle' clock pulse, but keep the direction
	 * the same.
	 */
	__raw_writeq(M_MAC_MDIO_DIR_INPUT | M_MAC_MDC | mac_mdio_genc,
		     sbm_mdio);
	__raw_writeq(M_MAC_MDIO_DIR_INPUT | mac_mdio_genc, sbm_mdio);

	regval = 0;

	for (idx = 0; idx < 16; idx++) {
		regval <<= 1;

		if (error == 0) {
			if (__raw_readq(sbm_mdio) & M_MAC_MDIO_IN)
				regval |= 1;
		}

		__raw_writeq(M_MAC_MDIO_DIR_INPUT | M_MAC_MDC | mac_mdio_genc,
			     sbm_mdio);
		__raw_writeq(M_MAC_MDIO_DIR_INPUT | mac_mdio_genc, sbm_mdio);
	}

	/* Switch back to output */
	__raw_writeq(M_MAC_MDIO_DIR_OUTPUT | mac_mdio_genc, sbm_mdio);

	if (error == 0)
		return regval;
	return 0xffff;
}


/**********************************************************************
 *  SBMAC_MII_WRITE(bus, phyaddr, regidx, regval)
 *
 *  Write a value to a PHY register.
 *
 *  Input parameters:
 *  	   bus     - MDIO bus handle
 *  	   phyaddr - PHY to use
 *  	   regidx  - register within the PHY
 *  	   regval  - data to write to register
 *
 *  Return value:
 *  	   0 for success
 ********************************************************************* */

static int sbmac_mii_write(struct mii_bus *bus, int phyaddr, int regidx,
			   u16 regval)
{
	struct sbmac_softc *sc = (struct sbmac_softc *)bus->priv;
	void __iomem *sbm_mdio = sc->sbm_mdio;
	int mac_mdio_genc;

	sbmac_mii_sync(sbm_mdio);

	sbmac_mii_senddata(sbm_mdio, MII_COMMAND_START, 2);
	sbmac_mii_senddata(sbm_mdio, MII_COMMAND_WRITE, 2);
	sbmac_mii_senddata(sbm_mdio, phyaddr, 5);
	sbmac_mii_senddata(sbm_mdio, regidx, 5);
	sbmac_mii_senddata(sbm_mdio, MII_COMMAND_ACK, 2);
	sbmac_mii_senddata(sbm_mdio, regval, 16);

	mac_mdio_genc = __raw_readq(sbm_mdio) & M_MAC_GENC;

	__raw_writeq(M_MAC_MDIO_DIR_OUTPUT | mac_mdio_genc, sbm_mdio);

	return 0;
}



/**********************************************************************
 *  SBDMA_INITCTX(d,s,chan,txrx,maxdescr)
 *
 *  Initialize a DMA channel context.  Since there are potentially
 *  eight DMA channels per MAC, it's nice to do this in a standard
 *  way.
 *
 *  Input parameters:
 *  	   d - struct sbmacdma (DMA channel context)
 *  	   s - struct sbmac_softc (pointer to a MAC)
 *  	   chan - channel number (0..1 right now)
 *  	   txrx - Identifies DMA_TX or DMA_RX for channel direction
 *      maxdescr - number of descriptors
 *
 *  Return value:
 *  	   nothing
 ********************************************************************* */

static void sbdma_initctx(struct sbmacdma *d, struct sbmac_softc *s, int chan,
			  int txrx, int maxdescr)
{
#ifdef CONFIG_SBMAC_COALESCE
	int int_pktcnt, int_timeout;
#endif

	/*
	 * Save away interesting stuff in the structure
	 */

	d->sbdma_eth       = s;
	d->sbdma_channel   = chan;
	d->sbdma_txdir     = txrx;

#if 0
	/* RMON clearing */
	s->sbe_idx =(s->sbm_base - A_MAC_BASE_0)/MAC_SPACING;
#endif

	__raw_writeq(0, s->sbm_base + R_MAC_RMON_TX_BYTES);
	__raw_writeq(0, s->sbm_base + R_MAC_RMON_COLLISIONS);
	__raw_writeq(0, s->sbm_base + R_MAC_RMON_LATE_COL);
	__raw_writeq(0, s->sbm_base + R_MAC_RMON_EX_COL);
	__raw_writeq(0, s->sbm_base + R_MAC_RMON_FCS_ERROR);
	__raw_writeq(0, s->sbm_base + R_MAC_RMON_TX_ABORT);
	__raw_writeq(0, s->sbm_base + R_MAC_RMON_TX_BAD);
	__raw_writeq(0, s->sbm_base + R_MAC_RMON_TX_GOOD);
	__raw_writeq(0, s->sbm_base + R_MAC_RMON_TX_RUNT);
	__raw_writeq(0, s->sbm_base + R_MAC_RMON_TX_OVERSIZE);
	__raw_writeq(0, s->sbm_base + R_MAC_RMON_RX_BYTES);
	__raw_writeq(0, s->sbm_base + R_MAC_RMON_RX_MCAST);
	__raw_writeq(0, s->sbm_base + R_MAC_RMON_RX_BCAST);
	__raw_writeq(0, s->sbm_base + R_MAC_RMON_RX_BAD);
	__raw_writeq(0, s->sbm_base + R_MAC_RMON_RX_GOOD);
	__raw_writeq(0, s->sbm_base + R_MAC_RMON_RX_RUNT);
	__raw_writeq(0, s->sbm_base + R_MAC_RMON_RX_OVERSIZE);
	__raw_writeq(0, s->sbm_base + R_MAC_RMON_RX_FCS_ERROR);
	__raw_writeq(0, s->sbm_base + R_MAC_RMON_RX_LENGTH_ERROR);
	__raw_writeq(0, s->sbm_base + R_MAC_RMON_RX_CODE_ERROR);
	__raw_writeq(0, s->sbm_base + R_MAC_RMON_RX_ALIGN_ERROR);

	/*
	 * initialize register pointers
	 */

	d->sbdma_config0 =
		s->sbm_base + R_MAC_DMA_REGISTER(txrx,chan,R_MAC_DMA_CONFIG0);
	d->sbdma_config1 =
		s->sbm_base + R_MAC_DMA_REGISTER(txrx,chan,R_MAC_DMA_CONFIG1);
	d->sbdma_dscrbase =
		s->sbm_base + R_MAC_DMA_REGISTER(txrx,chan,R_MAC_DMA_DSCR_BASE);
	d->sbdma_dscrcnt =
		s->sbm_base + R_MAC_DMA_REGISTER(txrx,chan,R_MAC_DMA_DSCR_CNT);
	d->sbdma_curdscr =
		s->sbm_base + R_MAC_DMA_REGISTER(txrx,chan,R_MAC_DMA_CUR_DSCRADDR);
	if (d->sbdma_txdir)
		d->sbdma_oodpktlost = NULL;
	else
		d->sbdma_oodpktlost =
			s->sbm_base + R_MAC_DMA_REGISTER(txrx,chan,R_MAC_DMA_OODPKTLOST_RX);

	/*
	 * Allocate memory for the ring
	 */

	d->sbdma_maxdescr = maxdescr;

	d->sbdma_dscrtable_unaligned = kcalloc(d->sbdma_maxdescr + 1,
					       sizeof(*d->sbdma_dscrtable),
					       GFP_KERNEL);

	/*
	 * The descriptor table must be aligned to at least 16 bytes or the
	 * MAC will corrupt it.
	 */
	d->sbdma_dscrtable = (struct sbdmadscr *)
			     ALIGN((unsigned long)d->sbdma_dscrtable_unaligned,
				   sizeof(*d->sbdma_dscrtable));

	d->sbdma_dscrtable_end = d->sbdma_dscrtable + d->sbdma_maxdescr;

	d->sbdma_dscrtable_phys = virt_to_phys(d->sbdma_dscrtable);

	/*
	 * And context table
	 */

	d->sbdma_ctxtable = kcalloc(d->sbdma_maxdescr,
				    sizeof(*d->sbdma_ctxtable), GFP_KERNEL);

#ifdef CONFIG_SBMAC_COALESCE
	/*
	 * Setup Rx/Tx DMA coalescing defaults
	 */

	int_pktcnt = (txrx == DMA_TX) ? int_pktcnt_tx : int_pktcnt_rx;
	if ( int_pktcnt ) {
		d->sbdma_int_pktcnt = int_pktcnt;
	} else {
		d->sbdma_int_pktcnt = 1;
	}

	int_timeout = (txrx == DMA_TX) ? int_timeout_tx : int_timeout_rx;
	if ( int_timeout ) {
		d->sbdma_int_timeout = int_timeout;
	} else {
		d->sbdma_int_timeout = 0;
	}
#endif

}

/**********************************************************************
 *  SBDMA_CHANNEL_START(d)
 *
 *  Initialize the hardware registers for a DMA channel.
 *
 *  Input parameters:
 *  	   d - DMA channel to init (context must be previously init'd
 *         rxtx - DMA_RX or DMA_TX depending on what type of channel
 *
 *  Return value:
 *  	   nothing
 ********************************************************************* */

static void sbdma_channel_start(struct sbmacdma *d, int rxtx)
{
	/*
	 * Turn on the DMA channel
	 */

#ifdef CONFIG_SBMAC_COALESCE
	__raw_writeq(V_DMA_INT_TIMEOUT(d->sbdma_int_timeout) |
		       0, d->sbdma_config1);
	__raw_writeq(M_DMA_EOP_INT_EN |
		       V_DMA_RINGSZ(d->sbdma_maxdescr) |
		       V_DMA_INT_PKTCNT(d->sbdma_int_pktcnt) |
		       0, d->sbdma_config0);
#else
	__raw_writeq(0, d->sbdma_config1);
	__raw_writeq(V_DMA_RINGSZ(d->sbdma_maxdescr) |
		       0, d->sbdma_config0);
#endif

	__raw_writeq(d->sbdma_dscrtable_phys, d->sbdma_dscrbase);

	/*
	 * Initialize ring pointers
	 */

	d->sbdma_addptr = d->sbdma_dscrtable;
	d->sbdma_remptr = d->sbdma_dscrtable;
}

/**********************************************************************
 *  SBDMA_CHANNEL_STOP(d)
 *
 *  Initialize the hardware registers for a DMA channel.
 *
 *  Input parameters:
 *  	   d - DMA channel to init (context must be previously init'd
 *
 *  Return value:
 *  	   nothing
 ********************************************************************* */

static void sbdma_channel_stop(struct sbmacdma *d)
{
	/*
	 * Turn off the DMA channel
	 */

	__raw_writeq(0, d->sbdma_config1);

	__raw_writeq(0, d->sbdma_dscrbase);

	__raw_writeq(0, d->sbdma_config0);

	/*
	 * Zero ring pointers
	 */

	d->sbdma_addptr = NULL;
	d->sbdma_remptr = NULL;
}

static inline void sbdma_align_skb(struct sk_buff *skb,
				   unsigned int power2, unsigned int offset)
{
	unsigned char *addr = skb->data;
	unsigned char *newaddr = PTR_ALIGN(addr, power2);

	skb_reserve(skb, newaddr - addr + offset);
}


/**********************************************************************
 *  SBDMA_ADD_RCVBUFFER(d,sb)
 *
 *  Add a buffer to the specified DMA channel.   For receive channels,
 *  this queues a buffer for inbound packets.
 *
 *  Input parameters:
 *	   sc - softc structure
 *  	    d - DMA channel descriptor
 * 	   sb - sk_buff to add, or NULL if we should allocate one
 *
 *  Return value:
 *  	   0 if buffer could not be added (ring is full)
 *  	   1 if buffer added successfully
 ********************************************************************* */


static int sbdma_add_rcvbuffer(struct sbmac_softc *sc, struct sbmacdma *d,
			       struct sk_buff *sb)
{
	struct net_device *dev = sc->sbm_dev;
	struct sbdmadscr *dsc;
	struct sbdmadscr *nextdsc;
	struct sk_buff *sb_new = NULL;
	int pktsize = ENET_PACKET_SIZE;

	/* get pointer to our current place in the ring */

	dsc = d->sbdma_addptr;
	nextdsc = SBDMA_NEXTBUF(d,sbdma_addptr);

	/*
	 * figure out if the ring is full - if the next descriptor
	 * is the same as the one that we're going to remove from
	 * the ring, the ring is full
	 */

	if (nextdsc == d->sbdma_remptr) {
		return -ENOSPC;
	}

	/*
	 * Allocate a sk_buff if we don't already have one.
	 * If we do have an sk_buff, reset it so that it's empty.
	 *
	 * Note: sk_buffs don't seem to be guaranteed to have any sort
	 * of alignment when they are allocated.  Therefore, allocate enough
	 * extra space to make sure that:
	 *
	 *    1. the data does not start in the middle of a cache line.
	 *    2. The data does not end in the middle of a cache line
	 *    3. The buffer can be aligned such that the IP addresses are
	 *       naturally aligned.
	 *
	 *  Remember, the SOCs MAC writes whole cache lines at a time,
	 *  without reading the old contents first.  So, if the sk_buff's