aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/Makefile5
-rw-r--r--drivers/md/md.c256
-rw-r--r--drivers/md/md.h2
-rw-r--r--drivers/md/raid5.c43
-rw-r--r--include/linux/raid/pq.h4
-rw-r--r--lib/raid6/Makefile9
-rw-r--r--lib/raid6/algos.c12
-rw-r--r--lib/raid6/altivec.uc3
-rw-r--r--lib/raid6/avx2.c251
-rw-r--r--lib/raid6/mmx.c2
-rw-r--r--lib/raid6/recov_avx2.c323
-rw-r--r--lib/raid6/recov_ssse3.c4
-rw-r--r--lib/raid6/sse1.c2
-rw-r--r--lib/raid6/sse2.c8
-rw-r--r--lib/raid6/test/Makefile29
-rw-r--r--lib/raid6/x86.h14
16 files changed, 809 insertions, 158 deletions
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 05afcca66de..e71fc4279aa 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -123,9 +123,10 @@ cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTI
123# does binutils support specific instructions? 123# does binutils support specific instructions?
124asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1) 124asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1)
125avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1) 125avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
126avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
126 127
127KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) 128KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr)
128KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) 129KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr)
129 130
130LDFLAGS := -m elf_$(UTS_MACHINE) 131LDFLAGS := -m elf_$(UTS_MACHINE)
131 132
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 4843b004c55..3db3d1b271f 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1414,12 +1414,11 @@ static __le32 calc_sb_1_csum(struct mdp_superblock_1 * sb)
1414 unsigned long long newcsum; 1414 unsigned long long newcsum;
1415 int size = 256 + le32_to_cpu(sb->max_dev)*2; 1415 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1416 __le32 *isuper = (__le32*)sb; 1416 __le32 *isuper = (__le32*)sb;
1417 int i;
1418 1417
1419 disk_csum = sb->sb_csum; 1418 disk_csum = sb->sb_csum;
1420 sb->sb_csum = 0; 1419 sb->sb_csum = 0;
1421 newcsum = 0; 1420 newcsum = 0;
1422 for (i=0; size>=4; size -= 4 ) 1421 for (; size >= 4; size -= 4)
1423 newcsum += le32_to_cpu(*isuper++); 1422 newcsum += le32_to_cpu(*isuper++);
1424 1423
1425 if (size == 2) 1424 if (size == 2)
@@ -4753,6 +4752,8 @@ md_attr_store(struct kobject *kobj, struct attribute *attr,
4753 } 4752 }
4754 mddev_get(mddev); 4753 mddev_get(mddev);
4755 spin_unlock(&all_mddevs_lock); 4754 spin_unlock(&all_mddevs_lock);
4755 if (entry->store == new_dev_store)
4756 flush_workqueue(md_misc_wq);
4756 rv = mddev_lock(mddev); 4757 rv = mddev_lock(mddev);
4757 if (!rv) { 4758 if (!rv) {
4758 rv = entry->store(mddev, page, length); 4759 rv = entry->store(mddev, page, length);
@@ -6346,24 +6347,23 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
6346 * Commands dealing with the RAID driver but not any 6347 * Commands dealing with the RAID driver but not any
6347 * particular array: 6348 * particular array:
6348 */ 6349 */
6349 switch (cmd) 6350 switch (cmd) {
6350 { 6351 case RAID_VERSION:
6351 case RAID_VERSION: 6352 err = get_version(argp);
6352 err = get_version(argp); 6353 goto done;
6353 goto done;
6354 6354
6355 case PRINT_RAID_DEBUG: 6355 case PRINT_RAID_DEBUG:
6356 err = 0; 6356 err = 0;
6357 md_print_devices(); 6357 md_print_devices();
6358 goto done; 6358 goto done;
6359 6359
6360#ifndef MODULE 6360#ifndef MODULE
6361 case RAID_AUTORUN: 6361 case RAID_AUTORUN:
6362 err = 0; 6362 err = 0;
6363 autostart_arrays(arg); 6363 autostart_arrays(arg);
6364 goto done; 6364 goto done;
6365#endif 6365#endif
6366 default:; 6366 default:;
6367 } 6367 }
6368 6368
6369 /* 6369 /*
@@ -6398,6 +6398,10 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
6398 goto abort; 6398 goto abort;
6399 } 6399 }
6400 6400
6401 if (cmd == ADD_NEW_DISK)
6402 /* need to ensure md_delayed_delete() has completed */
6403 flush_workqueue(md_misc_wq);
6404
6401 err = mddev_lock(mddev); 6405 err = mddev_lock(mddev);
6402 if (err) { 6406 if (err) {
6403 printk(KERN_INFO 6407 printk(KERN_INFO
@@ -6406,50 +6410,44 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
6406 goto abort; 6410 goto abort;
6407 } 6411 }
6408 6412
6409 switch (cmd) 6413 if (cmd == SET_ARRAY_INFO) {
6410 { 6414 mdu_array_info_t info;
6411 case SET_ARRAY_INFO: 6415 if (!arg)
6412 { 6416 memset(&info, 0, sizeof(info));
6413 mdu_array_info_t info; 6417 else if (copy_from_user(&info, argp, sizeof(info))) {
6414 if (!arg) 6418 err = -EFAULT;
6415 memset(&info, 0, sizeof(info)); 6419 goto abort_unlock;
6416 else if (copy_from_user(&info, argp, sizeof(info))) { 6420 }
6417 err = -EFAULT; 6421 if (mddev->pers) {
6418 goto abort_unlock; 6422 err = update_array_info(mddev, &info);
6419 } 6423 if (err) {
6420 if (mddev->pers) { 6424 printk(KERN_WARNING "md: couldn't update"
6421 err = update_array_info(mddev, &info); 6425 " array info. %d\n", err);
6422 if (err) { 6426 goto abort_unlock;
6423 printk(KERN_WARNING "md: couldn't update"
6424 " array info. %d\n", err);
6425 goto abort_unlock;
6426 }
6427 goto done_unlock;
6428 }
6429 if (!list_empty(&mddev->disks)) {
6430 printk(KERN_WARNING
6431 "md: array %s already has disks!\n",
6432 mdname(mddev));
6433 err = -EBUSY;
6434 goto abort_unlock;
6435 }
6436 if (mddev->raid_disks) {
6437 printk(KERN_WARNING
6438 "md: array %s already initialised!\n",
6439 mdname(mddev));
6440 err = -EBUSY;
6441 goto abort_unlock;
6442 }
6443 err = set_array_info(mddev, &info);
6444 if (err) {
6445 printk(KERN_WARNING "md: couldn't set"
6446 " array info. %d\n", err);
6447 goto abort_unlock;
6448 }
6449 } 6427 }
6450 goto done_unlock; 6428 goto done_unlock;
6451 6429 }
6452 default:; 6430 if (!list_empty(&mddev->disks)) {
6431 printk(KERN_WARNING
6432 "md: array %s already has disks!\n",
6433 mdname(mddev));
6434 err = -EBUSY;
6435 goto abort_unlock;
6436 }
6437 if (mddev->raid_disks) {
6438 printk(KERN_WARNING
6439 "md: array %s already initialised!\n",
6440 mdname(mddev));
6441 err = -EBUSY;
6442 goto abort_unlock;
6443 }
6444 err = set_array_info(mddev, &info);
6445 if (err) {
6446 printk(KERN_WARNING "md: couldn't set"
6447 " array info. %d\n", err);
6448 goto abort_unlock;
6449 }
6450 goto done_unlock;
6453 } 6451 }
6454 6452
6455 /* 6453 /*
@@ -6468,52 +6466,51 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
6468 /* 6466 /*
6469 * Commands even a read-only array can execute: 6467 * Commands even a read-only array can execute:
6470 */ 6468 */
6471 switch (cmd) 6469 switch (cmd) {
6472 { 6470 case GET_BITMAP_FILE:
6473 case GET_BITMAP_FILE: 6471 err = get_bitmap_file(mddev, argp);
6474 err = get_bitmap_file(mddev, argp); 6472 goto done_unlock;
6475 goto done_unlock;
6476 6473
6477 case RESTART_ARRAY_RW: 6474 case RESTART_ARRAY_RW:
6478 err = restart_array(mddev); 6475 err = restart_array(mddev);
6479 goto done_unlock; 6476 goto done_unlock;
6480 6477
6481 case STOP_ARRAY: 6478 case STOP_ARRAY:
6482 err = do_md_stop(mddev, 0, bdev); 6479 err = do_md_stop(mddev, 0, bdev);
6483 goto done_unlock; 6480 goto done_unlock;
6484 6481
6485 case STOP_ARRAY_RO: 6482 case STOP_ARRAY_RO:
6486 err = md_set_readonly(mddev, bdev); 6483 err = md_set_readonly(mddev, bdev);
6487 goto done_unlock; 6484 goto done_unlock;
6488 6485
6489 case BLKROSET: 6486 case BLKROSET:
6490 if (get_user(ro, (int __user *)(arg))) { 6487 if (get_user(ro, (int __user *)(arg))) {
6491 err = -EFAULT; 6488 err = -EFAULT;
6492 goto done_unlock; 6489 goto done_unlock;
6493 } 6490 }
6494 err = -EINVAL; 6491 err = -EINVAL;
6495 6492
6496 /* if the bdev is going readonly the value of mddev->ro 6493 /* if the bdev is going readonly the value of mddev->ro
6497 * does not matter, no writes are coming 6494 * does not matter, no writes are coming
6498 */ 6495 */
6499 if (ro) 6496 if (ro)
6500 goto done_unlock; 6497 goto done_unlock;
6501 6498
6502 /* are we are already prepared for writes? */ 6499 /* are we are already prepared for writes? */
6503 if (mddev->ro != 1) 6500 if (mddev->ro != 1)
6504 goto done_unlock; 6501 goto done_unlock;
6505 6502
6506 /* transitioning to readauto need only happen for 6503 /* transitioning to readauto need only happen for
6507 * arrays that call md_write_start 6504 * arrays that call md_write_start
6508 */ 6505 */
6509 if (mddev->pers) { 6506 if (mddev->pers) {
6510 err = restart_array(mddev); 6507 err = restart_array(mddev);
6511 if (err == 0) { 6508 if (err == 0) {
6512 mddev->ro = 2; 6509 mddev->ro = 2;
6513 set_disk_ro(mddev->gendisk, 0); 6510 set_disk_ro(mddev->gendisk, 0);
6514 }
6515 } 6511 }
6516 goto done_unlock; 6512 }
6513 goto done_unlock;
6517 } 6514 }
6518 6515
6519 /* 6516 /*
@@ -6535,37 +6532,36 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
6535 } 6532 }
6536 } 6533 }
6537 6534
6538 switch (cmd) 6535 switch (cmd) {
6536 case ADD_NEW_DISK:
6539 { 6537 {
6540 case ADD_NEW_DISK: 6538 mdu_disk_info_t info;
6541 { 6539 if (copy_from_user(&info, argp, sizeof(info)))
6542 mdu_disk_info_t info; 6540 err = -EFAULT;
6543 if (copy_from_user(&info, argp, sizeof(info))) 6541 else
6544 err = -EFAULT; 6542 err = add_new_disk(mddev, &info);
6545 else 6543 goto done_unlock;
6546 err = add_new_disk(mddev, &info); 6544 }
6547 goto done_unlock;
6548 }
6549 6545
6550 case HOT_REMOVE_DISK: 6546 case HOT_REMOVE_DISK:
6551 err = hot_remove_disk(mddev, new_decode_dev(arg)); 6547 err = hot_remove_disk(mddev, new_decode_dev(arg));
6552 goto done_unlock; 6548 goto done_unlock;
6553 6549
6554 case HOT_ADD_DISK: 6550 case HOT_ADD_DISK:
6555 err = hot_add_disk(mddev, new_decode_dev(arg)); 6551 err = hot_add_disk(mddev, new_decode_dev(arg));
6556 goto done_unlock; 6552 goto done_unlock;
6557 6553
6558 case RUN_ARRAY: 6554 case RUN_ARRAY:
6559 err = do_md_run(mddev); 6555 err = do_md_run(mddev);
6560 goto done_unlock; 6556 goto done_unlock;
6561 6557
6562 case SET_BITMAP_FILE: 6558 case SET_BITMAP_FILE:
6563 err = set_bitmap_file(mddev, (int)arg); 6559 err = set_bitmap_file(mddev, (int)arg);
6564 goto done_unlock; 6560 goto done_unlock;
6565 6561
6566 default: 6562 default:
6567 err = -EINVAL; 6563 err = -EINVAL;
6568 goto abort_unlock; 6564 goto abort_unlock;
6569 } 6565 }
6570 6566
6571done_unlock: 6567done_unlock:
@@ -7184,6 +7180,7 @@ void md_done_sync(struct mddev *mddev, int blocks, int ok)
7184 wake_up(&mddev->recovery_wait); 7180 wake_up(&mddev->recovery_wait);
7185 if (!ok) { 7181 if (!ok) {
7186 set_bit(MD_RECOVERY_INTR, &mddev->recovery); 7182 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7183 set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
7187 md_wakeup_thread(mddev->thread); 7184 md_wakeup_thread(mddev->thread);
7188 // stop recovery, signal do_sync .... 7185 // stop recovery, signal do_sync ....
7189 } 7186 }
@@ -7281,6 +7278,7 @@ EXPORT_SYMBOL_GPL(md_allow_write);
7281 7278
7282#define SYNC_MARKS 10 7279#define SYNC_MARKS 10
7283#define SYNC_MARK_STEP (3*HZ) 7280#define SYNC_MARK_STEP (3*HZ)
7281#define UPDATE_FREQUENCY (5*60*HZ)
7284void md_do_sync(struct md_thread *thread) 7282void md_do_sync(struct md_thread *thread)
7285{ 7283{
7286 struct mddev *mddev = thread->mddev; 7284 struct mddev *mddev = thread->mddev;
@@ -7289,6 +7287,7 @@ void md_do_sync(struct md_thread *thread)
7289 window; 7287 window;
7290 sector_t max_sectors,j, io_sectors; 7288 sector_t max_sectors,j, io_sectors;
7291 unsigned long mark[SYNC_MARKS]; 7289 unsigned long mark[SYNC_MARKS];
7290 unsigned long update_time;
7292 sector_t mark_cnt[SYNC_MARKS]; 7291 sector_t mark_cnt[SYNC_MARKS];
7293 int last_mark,m; 7292 int last_mark,m;
7294 struct list_head *tmp; 7293 struct list_head *tmp;
@@ -7448,6 +7447,7 @@ void md_do_sync(struct md_thread *thread)
7448 mddev->curr_resync_completed = j; 7447 mddev->curr_resync_completed = j;
7449 sysfs_notify(&mddev->kobj, NULL, "sync_completed"); 7448 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
7450 md_new_event(mddev); 7449 md_new_event(mddev);
7450 update_time = jiffies;
7451 7451
7452 blk_start_plug(&plug); 7452 blk_start_plug(&plug);
7453 while (j < max_sectors) { 7453 while (j < max_sectors) {
@@ -7459,6 +7459,7 @@ void md_do_sync(struct md_thread *thread)
7459 ((mddev->curr_resync > mddev->curr_resync_completed && 7459 ((mddev->curr_resync > mddev->curr_resync_completed &&
7460 (mddev->curr_resync - mddev->curr_resync_completed) 7460 (mddev->curr_resync - mddev->curr_resync_completed)
7461 > (max_sectors >> 4)) || 7461 > (max_sectors >> 4)) ||
7462 time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
7462 (j - mddev->curr_resync_completed)*2 7463 (j - mddev->curr_resync_completed)*2
7463 >= mddev->resync_max - mddev->curr_resync_completed 7464 >= mddev->resync_max - mddev->curr_resync_completed
7464 )) { 7465 )) {
@@ -7466,6 +7467,10 @@ void md_do_sync(struct md_thread *thread)
7466 wait_event(mddev->recovery_wait, 7467 wait_event(mddev->recovery_wait,
7467 atomic_read(&mddev->recovery_active) == 0); 7468 atomic_read(&mddev->recovery_active) == 0);
7468 mddev->curr_resync_completed = j; 7469 mddev->curr_resync_completed = j;
7470 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
7471 j > mddev->recovery_cp)
7472 mddev->recovery_cp = j;
7473 update_time = jiffies;
7469 set_bit(MD_CHANGE_CLEAN, &mddev->flags); 7474 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7470 sysfs_notify(&mddev->kobj, NULL, "sync_completed"); 7475 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
7471 } 7476 }
@@ -7570,8 +7575,13 @@ void md_do_sync(struct md_thread *thread)
7570 printk(KERN_INFO 7575 printk(KERN_INFO
7571 "md: checkpointing %s of %s.\n", 7576 "md: checkpointing %s of %s.\n",
7572 desc, mdname(mddev)); 7577 desc, mdname(mddev));
7573 mddev->recovery_cp = 7578 if (test_bit(MD_RECOVERY_ERROR,
7574 mddev->curr_resync_completed; 7579 &mddev->recovery))
7580 mddev->recovery_cp =
7581 mddev->curr_resync_completed;
7582 else
7583 mddev->recovery_cp =
7584 mddev->curr_resync;
7575 } 7585 }
7576 } else 7586 } else
7577 mddev->recovery_cp = MaxSector; 7587 mddev->recovery_cp = MaxSector;
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 1e2fc3d9c74..eca59c3074e 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -307,6 +307,7 @@ struct mddev {
307 * REQUEST: user-space has requested a sync (used with SYNC) 307 * REQUEST: user-space has requested a sync (used with SYNC)
308 * CHECK: user-space request for check-only, no repair 308 * CHECK: user-space request for check-only, no repair
309 * RESHAPE: A reshape is happening 309 * RESHAPE: A reshape is happening
310 * ERROR: sync-action interrupted because io-error
310 * 311 *
311 * If neither SYNC or RESHAPE are set, then it is a recovery. 312 * If neither SYNC or RESHAPE are set, then it is a recovery.
312 */ 313 */
@@ -320,6 +321,7 @@ struct mddev {
320#define MD_RECOVERY_CHECK 7 321#define MD_RECOVERY_CHECK 7
321#define MD_RECOVERY_RESHAPE 8 322#define MD_RECOVERY_RESHAPE 8
322#define MD_RECOVERY_FROZEN 9 323#define MD_RECOVERY_FROZEN 9
324#define MD_RECOVERY_ERROR 10
323 325
324 unsigned long recovery; 326 unsigned long recovery;
325 /* If a RAID personality determines that recovery (of a particular 327 /* If a RAID personality determines that recovery (of a particular
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 8d8555bf3e1..19d77a02663 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -53,6 +53,8 @@
53#include <linux/cpu.h> 53#include <linux/cpu.h>
54#include <linux/slab.h> 54#include <linux/slab.h>
55#include <linux/ratelimit.h> 55#include <linux/ratelimit.h>
56#include <trace/events/block.h>
57
56#include "md.h" 58#include "md.h"
57#include "raid5.h" 59#include "raid5.h"
58#include "raid0.h" 60#include "raid0.h"
@@ -182,6 +184,8 @@ static void return_io(struct bio *return_bi)
182 return_bi = bi->bi_next; 184 return_bi = bi->bi_next;
183 bi->bi_next = NULL; 185 bi->bi_next = NULL;
184 bi->bi_size = 0; 186 bi->bi_size = 0;
187 trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
188 bi, 0);
185 bio_endio(bi, 0); 189 bio_endio(bi, 0);
186 bi = return_bi; 190 bi = return_bi;
187 } 191 }
@@ -670,6 +674,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
670 bi->bi_next = NULL; 674 bi->bi_next = NULL;
671 if (rrdev) 675 if (rrdev)
672 set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags); 676 set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags);
677 trace_block_bio_remap(bdev_get_queue(bi->bi_bdev),
678 bi, disk_devt(conf->mddev->gendisk),
679 sh->dev[i].sector);
673 generic_make_request(bi); 680 generic_make_request(bi);
674 } 681 }
675 if (rrdev) { 682 if (rrdev) {
@@ -697,6 +704,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
697 rbi->bi_io_vec[0].bv_offset = 0; 704 rbi->bi_io_vec[0].bv_offset = 0;
698 rbi->bi_size = STRIPE_SIZE; 705 rbi->bi_size = STRIPE_SIZE;
699 rbi->bi_next = NULL; 706 rbi->bi_next = NULL;
707 trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev),
708 rbi, disk_devt(conf->mddev->gendisk),
709 sh->dev[i].sector);
700 generic_make_request(rbi); 710 generic_make_request(rbi);
701 } 711 }
702 if (!rdev && !rrdev) { 712 if (!rdev && !rrdev) {
@@ -2853,8 +2863,10 @@ static void handle_stripe_dirtying(struct r5conf *conf,
2853 pr_debug("for sector %llu, rmw=%d rcw=%d\n", 2863 pr_debug("for sector %llu, rmw=%d rcw=%d\n",
2854 (unsigned long long)sh->sector, rmw, rcw); 2864 (unsigned long long)sh->sector, rmw, rcw);
2855 set_bit(STRIPE_HANDLE, &sh->state); 2865 set_bit(STRIPE_HANDLE, &sh->state);
2856 if (rmw < rcw && rmw > 0) 2866 if (rmw < rcw && rmw > 0) {
2857 /* prefer read-modify-write, but need to get some data */ 2867 /* prefer read-modify-write, but need to get some data */
2868 blk_add_trace_msg(conf->mddev->queue, "raid5 rmw %llu %d",
2869 (unsigned long long)sh->sector, rmw);
2858 for (i = disks; i--; ) { 2870 for (i = disks; i--; ) {
2859 struct r5dev *dev = &sh->dev[i]; 2871 struct r5dev *dev = &sh->dev[i];
2860 if ((dev->towrite || i == sh->pd_idx) && 2872 if ((dev->towrite || i == sh->pd_idx) &&
@@ -2865,7 +2877,7 @@ static void handle_stripe_dirtying(struct r5conf *conf,
2865 if ( 2877 if (
2866 test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { 2878 test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
2867 pr_debug("Read_old block " 2879 pr_debug("Read_old block "
2868 "%d for r-m-w\n", i); 2880 "%d for r-m-w\n", i);
2869 set_bit(R5_LOCKED, &dev->flags); 2881 set_bit(R5_LOCKED, &dev->flags);
2870 set_bit(R5_Wantread, &dev->flags); 2882 set_bit(R5_Wantread, &dev->flags);
2871 s->locked++; 2883 s->locked++;
@@ -2875,8 +2887,10 @@ static void handle_stripe_dirtying(struct r5conf *conf,
2875 } 2887 }
2876 } 2888 }
2877 } 2889 }
2890 }
2878 if (rcw <= rmw && rcw > 0) { 2891 if (rcw <= rmw && rcw > 0) {
2879 /* want reconstruct write, but need to get some data */ 2892 /* want reconstruct write, but need to get some data */
2893 int qread =0;
2880 rcw = 0; 2894 rcw = 0;
2881 for (i = disks; i--; ) { 2895 for (i = disks; i--; ) {
2882 struct r5dev *dev = &sh->dev[i]; 2896 struct r5dev *dev = &sh->dev[i];
@@ -2895,12 +2909,17 @@ static void handle_stripe_dirtying(struct r5conf *conf,
2895 set_bit(R5_LOCKED, &dev->flags); 2909 set_bit(R5_LOCKED, &dev->flags);
2896 set_bit(R5_Wantread, &dev->flags); 2910 set_bit(R5_Wantread, &dev->flags);
2897 s->locked++; 2911 s->locked++;
2912 qread++;
2898 } else { 2913 } else {
2899 set_bit(STRIPE_DELAYED, &sh->state); 2914 set_bit(STRIPE_DELAYED, &sh->state);
2900 set_bit(STRIPE_HANDLE, &sh->state); 2915 set_bit(STRIPE_HANDLE, &sh->state);
2901 } 2916 }
2902 } 2917 }
2903 } 2918 }
2919 if (rcw)
2920 blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d",
2921 (unsigned long long)sh->sector,
2922 rcw, qread, test_bit(STRIPE_DELAYED, &sh->state));
2904 } 2923 }
2905 /* now if nothing is locked, and if we have enough data, 2924 /* now if nothing is locked, and if we have enough data,
2906 * we can start a write request 2925 * we can start a write request
@@ -3222,10 +3241,7 @@ static void handle_stripe_expansion(struct r5conf *conf, struct stripe_head *sh)
3222 3241
3223 } 3242 }
3224 /* done submitting copies, wait for them to complete */ 3243 /* done submitting copies, wait for them to complete */
3225 if (tx) { 3244 async_tx_quiesce(&tx);
3226 async_tx_ack(tx);
3227 dma_wait_for_async_tx(tx);
3228 }
3229} 3245}
3230 3246
3231/* 3247/*
@@ -3901,6 +3917,8 @@ static void raid5_align_endio(struct bio *bi, int error)
3901 rdev_dec_pending(rdev, conf->mddev); 3917 rdev_dec_pending(rdev, conf->mddev);
3902 3918
3903 if (!error && uptodate) { 3919 if (!error && uptodate) {
3920 trace_block_bio_complete(bdev_get_queue(raid_bi->bi_bdev),
3921 raid_bi, 0);
3904 bio_endio(raid_bi, 0); 3922 bio_endio(raid_bi, 0);
3905 if (atomic_dec_and_test(&conf->active_aligned_reads)) 3923 if (atomic_dec_and_test(&conf->active_aligned_reads))
3906 wake_up(&conf->wait_for_stripe); 3924 wake_up(&conf->wait_for_stripe);
@@ -4005,6 +4023,9 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
4005 atomic_inc(&conf->active_aligned_reads); 4023 atomic_inc(&conf->active_aligned_reads);
4006 spin_unlock_irq(&conf->device_lock); 4024 spin_unlock_irq(&conf->device_lock);
4007 4025
4026 trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev),
4027 align_bi, disk_devt(mddev->gendisk),
4028 raid_bio->bi_sector);
4008 generic_make_request(align_bi); 4029 generic_make_request(align_bi);
4009 return 1; 4030 return 1;
4010 } else { 4031 } else {
@@ -4079,6 +4100,7 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
4079 struct stripe_head *sh; 4100 struct stripe_head *sh;
4080 struct mddev *mddev = cb->cb.data; 4101 struct mddev *mddev = cb->cb.data;
4081 struct r5conf *conf = mddev->private; 4102 struct r5conf *conf = mddev->private;
4103 int cnt = 0;
4082 4104
4083 if (cb->list.next && !list_empty(&cb->list)) { 4105 if (cb->list.next && !list_empty(&cb->list)) {
4084 spin_lock_irq(&conf->device_lock); 4106 spin_lock_irq(&conf->device_lock);
@@ -4093,9 +4115,11 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
4093 smp_mb__before_clear_bit(); 4115 smp_mb__before_clear_bit();
4094 clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state); 4116 clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state);
4095 __release_stripe(conf, sh); 4117 __release_stripe(conf, sh);
4118 cnt++;
4096 } 4119 }
4097 spin_unlock_irq(&conf->device_lock); 4120 spin_unlock_irq(&conf->device_lock);
4098 } 4121 }
4122 trace_block_unplug(mddev->queue, cnt, !from_schedule);
4099 kfree(cb); 4123 kfree(cb);
4100} 4124}
4101 4125
@@ -4353,6 +4377,8 @@ static void make_request(struct mddev *mddev, struct bio * bi)
4353 if ( rw == WRITE ) 4377 if ( rw == WRITE )
4354 md_write_end(mddev); 4378 md_write_end(mddev);
4355 4379
4380 trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
4381 bi, 0);
4356 bio_endio(bi, 0); 4382 bio_endio(bi, 0);
4357 } 4383 }
4358} 4384}
@@ -4729,8 +4755,11 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
4729 handled++; 4755 handled++;
4730 } 4756 }
4731 remaining = raid5_dec_bi_active_stripes(raid_bio); 4757 remaining = raid5_dec_bi_active_stripes(raid_bio);
4732 if (remaining == 0) 4758 if (remaining == 0) {
4759 trace_block_bio_complete(bdev_get_queue(raid_bio->bi_bdev),
4760 raid_bio, 0);
4733 bio_endio(raid_bio, 0); 4761 bio_endio(raid_bio, 0);
4762 }
4734 if (atomic_dec_and_test(&conf->active_aligned_reads)) 4763 if (atomic_dec_and_test(&conf->active_aligned_reads))
4735 wake_up(&conf->wait_for_stripe); 4764 wake_up(&conf->wait_for_stripe);
4736 return handled; 4765 return handled;
diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index 640c69ceec9..8dfaa2ce2e9 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -98,6 +98,9 @@ extern const struct raid6_calls raid6_altivec1;
98extern const struct raid6_calls raid6_altivec2; 98extern const struct raid6_calls raid6_altivec2;
99extern const struct raid6_calls raid6_altivec4; 99extern const struct raid6_calls raid6_altivec4;
100extern const struct raid6_calls raid6_altivec8; 100extern const struct raid6_calls raid6_altivec8;
101extern const struct raid6_calls raid6_avx2x1;
102extern const struct raid6_calls raid6_avx2x2;
103extern const struct raid6_calls raid6_avx2x4;
101 104
102struct raid6_recov_calls { 105struct raid6_recov_calls {
103 void (*data2)(int, size_t, int, int, void **); 106 void (*data2)(int, size_t, int, int, void **);
@@ -109,6 +112,7 @@ struct raid6_recov_calls {
109 112
110extern const struct raid6_recov_calls raid6_recov_intx1; 113extern const struct raid6_recov_calls raid6_recov_intx1;
111extern const struct raid6_recov_calls raid6_recov_ssse3; 114extern const struct raid6_recov_calls raid6_recov_ssse3;
115extern const struct raid6_recov_calls raid6_recov_avx2;
112 116
113/* Algorithm list */ 117/* Algorithm list */
114extern const struct raid6_calls * const raid6_algos[]; 118extern const struct raid6_calls * const raid6_algos[];
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index de06dfe165b..9f7c184725d 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -1,8 +1,11 @@
1obj-$(CONFIG_RAID6_PQ) += raid6_pq.o 1obj-$(CONFIG_RAID6_PQ) += raid6_pq.o
2 2
3raid6_pq-y += algos.o recov.o recov_ssse3.o tables.o int1.o int2.o int4.o \ 3raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \
4 int8.o int16.o int32.o altivec1.o altivec2.o altivec4.o \ 4 int8.o int16.o int32.o
5 altivec8.o mmx.o sse1.o sse2.o 5
6raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o
7raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
8
6hostprogs-y += mktables 9hostprogs-y += mktables
7 10
8quiet_cmd_unroll = UNROLL $@ 11quiet_cmd_unroll = UNROLL $@
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 589f5f50ad2..6d7316fe9f3 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -45,11 +45,20 @@ const struct raid6_calls * const raid6_algos[] = {
45 &raid6_sse1x2, 45 &raid6_sse1x2,
46 &raid6_sse2x1, 46 &raid6_sse2x1,
47 &raid6_sse2x2, 47 &raid6_sse2x2,
48#ifdef CONFIG_AS_AVX2
49 &raid6_avx2x1,
50 &raid6_avx2x2,
51#endif
48#endif 52#endif
49#if defined(__x86_64__) && !defined(__arch_um__) 53#if defined(__x86_64__) && !defined(__arch_um__)
50 &raid6_sse2x1, 54 &raid6_sse2x1,
51 &raid6_sse2x2, 55 &raid6_sse2x2,
52 &raid6_sse2x4, 56 &raid6_sse2x4,
57#ifdef CONFIG_AS_AVX2
58 &raid6_avx2x1,
59 &raid6_avx2x2,
60 &raid6_avx2x4,
61#endif
53#endif 62#endif
54#ifdef CONFIG_ALTIVEC 63#ifdef CONFIG_ALTIVEC
55 &raid6_altivec1, 64 &raid6_altivec1,
@@ -72,6 +81,9 @@ EXPORT_SYMBOL_GPL(raid6_datap_recov);
72 81
73const struct raid6_recov_calls *const raid6_recov_algos[] = { 82const struct raid6_recov_calls *const raid6_recov_algos[] = {
74#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) 83#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
84#ifdef CONFIG_AS_AVX2
85 &raid6_recov_avx2,
86#endif
75 &raid6_recov_ssse3, 87 &raid6_recov_ssse3,
76#endif 88#endif
77 &raid6_recov_intx1, 89 &raid6_recov_intx1,
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
index b71012b756f..7cc12b532e9 100644
--- a/lib/raid6/altivec.uc
+++ b/lib/raid6/altivec.uc
@@ -24,13 +24,10 @@
24 24
25#include <linux/raid/pq.h> 25#include <linux/raid/pq.h>
26 26
27#ifdef CONFIG_ALTIVEC
28
29#include <altivec.h> 27#include <altivec.h>
30#ifdef __KERNEL__ 28#ifdef __KERNEL__
31# include <asm/cputable.h> 29# include <asm/cputable.h>
32# include <asm/switch_to.h> 30# include <asm/switch_to.h>
33#endif
34 31
35/* 32/*
36 * This is the C data type to use. We use a vector of 33 * This is the C data type to use. We use a vector of
diff --git a/lib/raid6/avx2.c b/lib/raid6/avx2.c
new file mode 100644
index 00000000000..bc3b1dd436e
--- /dev/null
+++ b/lib/raid6/avx2.c
@@ -0,0 +1,251 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright (C) 2012 Intel Corporation
4 * Author: Yuanhan Liu <yuanhan.liu@linux.intel.com>
5 *
6 * Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved
7 *
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
12 * Boston MA 02111-1307, USA; either version 2 of the License, or
13 * (at your option) any later version; incorporated herein by reference.
14 *
15 * ----------------------------------------------------------------------- */
16
17/*
18 * AVX2 implementation of RAID-6 syndrome functions
19 *
20 */
21
22#ifdef CONFIG_AS_AVX2
23
24#include <linux/raid/pq.h>
25#include "x86.h"
26
27static const struct raid6_avx2_constants {
28 u64 x1d[4];
29} raid6_avx2_constants __aligned(32) = {
30 { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
31 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,},
32};
33
34static int raid6_have_avx2(void)
35{
36 return boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX);
37}
38
39/*
40 * Plain AVX2 implementation
41 */
42static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs)
43{
44 u8 **dptr = (u8 **)ptrs;
45 u8 *p, *q;
46 int d, z, z0;
47
48 z0 = disks - 3; /* Highest data disk */
49 p = dptr[z0+1]; /* XOR parity */
50 q = dptr[z0+2]; /* RS syndrome */
51
52 kernel_fpu_begin();
53
54 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
55 asm volatile("vpxor %ymm3,%ymm3,%ymm3"); /* Zero temp */
56
57 for (d = 0; d < bytes; d += 32) {
58 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
59 asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */
60 asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
61 asm volatile("vmovdqa %ymm2,%ymm4");/* Q[0] */
62 asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z0-1][d]));
63 for (z = z0-2; z >= 0; z--) {
64 asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
65 asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
66 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
67 asm volatile("vpand %ymm0,%ymm5,%ymm5");
68 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
69 asm volatile("vpxor %ymm6,%ymm2,%ymm2");
70 asm volatile("vpxor %ymm6,%ymm4,%ymm4");
71 asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z][d]));
72 }
73 asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
74 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
75 asm volatile("vpand %ymm0,%ymm5,%ymm5");
76 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
77 asm volatile("vpxor %ymm6,%ymm2,%ymm2");
78 asm volatile("vpxor %ymm6,%ymm4,%ymm4");
79
80 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
81 asm volatile("vpxor %ymm2,%ymm2,%ymm2");
82 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
83 asm volatile("vpxor %ymm4,%ymm4,%ymm4");
84 }
85
86 asm volatile("sfence" : : : "memory");
87 kernel_fpu_end();
88}
89
90const struct raid6_calls raid6_avx2x1 = {
91 raid6_avx21_gen_syndrome,
92 raid6_have_avx2,
93 "avx2x1",
94 1 /* Has cache hints */
95};
96
97/*
98 * Unrolled-by-2 AVX2 implementation
99 */
100static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs)
101{
102 u8 **dptr = (u8 **)ptrs;
103 u8 *p, *q;
104 int d, z, z0;
105
106 z0 = disks - 3; /* Highest data disk */
107 p = dptr[z0+1]; /* XOR parity */
108 q = dptr[z0+2]; /* RS syndrome */
109
110 kernel_fpu_begin();
111
112 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
113 asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */
114
115 /* We uniformly assume a single prefetch covers at least 32 bytes */
116 for (d = 0; d < bytes; d += 64) {
117 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
118 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d+32]));
119 asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */
120 asm volatile("vmovdqa %0,%%ymm3" : : "m" (dptr[z0][d+32]));/* P[1] */
121 asm volatile("vmovdqa %ymm2,%ymm4"); /* Q[0] */
122 asm volatile("vmovdqa %ymm3,%ymm6"); /* Q[1] */
123 for (z = z0-1; z >= 0; z--) {
124 asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
125 asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32]));
126 asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5");
127 asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7");
128 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
129 asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
130 asm volatile("vpand %ymm0,%ymm5,%ymm5");
131 asm volatile("vpand %ymm0,%ymm7,%ymm7");
132 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
133 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
134 asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d]));
135 asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32]));
136 asm volatile("vpxor %ymm5,%ymm2,%ymm2");
137 asm volatile("vpxor %ymm7,%ymm3,%ymm3");
138 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
139 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
140 }
141 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
142 asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
143 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
144 asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
145 }
146
147 asm volatile("sfence" : : : "memory");
148 kernel_fpu_end();
149}
150
151const struct raid6_calls raid6_avx2x2 = {
152 raid6_avx22_gen_syndrome,
153 raid6_have_avx2,
154 "avx2x2",
155 1 /* Has cache hints */
156};
157
158#ifdef CONFIG_X86_64
159
160/*
161 * Unrolled-by-4 AVX2 implementation
162 */
163static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs)
164{
165 u8 **dptr = (u8 **)ptrs;
166 u8 *p, *q;
167 int d, z, z0;
168
169 z0 = disks - 3; /* Highest data disk */
170 p = dptr[z0+1]; /* XOR parity */
171 q = dptr[z0+2]; /* RS syndrome */
172
173 kernel_fpu_begin();
174
175 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
176 asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */
177 asm volatile("vpxor %ymm2,%ymm2,%ymm2"); /* P[0] */
178 asm volatile("vpxor %ymm3,%ymm3,%ymm3"); /* P[1] */
179 asm volatile("vpxor %ymm4,%ymm4,%ymm4"); /* Q[0] */
180 asm volatile("vpxor %ymm6,%ymm6,%ymm6"); /* Q[1] */
181 asm volatile("vpxor %ymm10,%ymm10,%ymm10"); /* P[2] */
182 asm volatile("vpxor %ymm11,%ymm11,%ymm11"); /* P[3] */
183 asm volatile("vpxor %ymm12,%ymm12,%ymm12"); /* Q[2] */
184 asm volatile("vpxor %ymm14,%ymm14,%ymm14"); /* Q[3] */
185
186 for (d = 0; d < bytes; d += 128) {
187 for (z = z0; z >= 0; z--) {
188 asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
189 asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32]));
190 asm volatile("prefetchnta %0" : : "m" (dptr[z][d+64]));
191 asm volatile("prefetchnta %0" : : "m" (dptr[z][d+96]));
192 asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5");
193 asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7");
194 asm volatile("vpcmpgtb %ymm12,%ymm1,%ymm13");
195 asm volatile("vpcmpgtb %ymm14,%ymm1,%ymm15");
196 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
197 asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
198 asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
199 asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
200 asm volatile("vpand %ymm0,%ymm5,%ymm5");
201 asm volatile("vpand %ymm0,%ymm7,%ymm7");
202 asm volatile("vpand %ymm0,%ymm13,%ymm13");
203 asm volatile("vpand %ymm0,%ymm15,%ymm15");
204 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
205 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
206 asm volatile("vpxor %ymm13,%ymm12,%ymm12");
207 asm volatile("vpxor %ymm15,%ymm14,%ymm14");
208 asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d]));
209 asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32]));
210 asm volatile("vmovdqa %0,%%ymm13" : : "m" (dptr[z][d+64]));
211 asm volatile("vmovdqa %0,%%ymm15" : : "m" (dptr[z][d+96]));
212 asm volatile("vpxor %ymm5,%ymm2,%ymm2");
213 asm volatile("vpxor %ymm7,%ymm3,%ymm3");
214 asm volatile("vpxor %ymm13,%ymm10,%ymm10");
215 asm volatile("vpxor %ymm15,%ymm11,%ymm11");
216 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
217 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
218 asm volatile("vpxor %ymm13,%ymm12,%ymm12");
219 asm volatile("vpxor %ymm15,%ymm14,%ymm14");
220 }
221 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
222 asm volatile("vpxor %ymm2,%ymm2,%ymm2");
223 asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
224 asm volatile("vpxor %ymm3,%ymm3,%ymm3");
225 asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64]));
226 asm volatile("vpxor %ymm10,%ymm10,%ymm10");
227 asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96]));
228 asm volatile("vpxor %ymm11,%ymm11,%ymm11");
229 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
230 asm volatile("vpxor %ymm4,%ymm4,%ymm4");
231 asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
232 asm volatile("vpxor %ymm6,%ymm6,%ymm6");
233 asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64]));
234 asm volatile("vpxor %ymm12,%ymm12,%ymm12");
235 asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96]));
236 asm volatile("vpxor %ymm14,%ymm14,%ymm14");
237 }
238
239 asm volatile("sfence" : : : "memory");
240 kernel_fpu_end();
241}
242
243const struct raid6_calls raid6_avx2x4 = {
244 raid6_avx24_gen_syndrome,
245 raid6_have_avx2,
246 "avx2x4",
247 1 /* Has cache hints */
248};
249#endif
250
251#endif /* CONFIG_AS_AVX2 */
diff --git a/lib/raid6/mmx.c b/lib/raid6/mmx.c
index 279347f2309..590c71c9e20 100644
--- a/lib/raid6/mmx.c
+++ b/lib/raid6/mmx.c
@@ -16,7 +16,7 @@
16 * MMX implementation of RAID-6 syndrome functions 16 * MMX implementation of RAID-6 syndrome functions
17 */ 17 */
18 18
19#if defined(__i386__) && !defined(__arch_um__) 19#ifdef CONFIG_X86_32
20 20
21#include <linux/raid/pq.h> 21#include <linux/raid/pq.h>
22#include "x86.h" 22#include "x86.h"
diff --git a/lib/raid6/recov_avx2.c b/lib/raid6/recov_avx2.c
new file mode 100644
index 00000000000..e1eea433a49
--- /dev/null
+++ b/lib/raid6/recov_avx2.c
@@ -0,0 +1,323 @@
1/*
2 * Copyright (C) 2012 Intel Corporation
3 * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; version 2
8 * of the License.
9 */
10
11#if CONFIG_AS_AVX2
12
13#include <linux/raid/pq.h>
14#include "x86.h"
15
16static int raid6_has_avx2(void)
17{
18 return boot_cpu_has(X86_FEATURE_AVX2) &&
19 boot_cpu_has(X86_FEATURE_AVX);
20}
21
22static void raid6_2data_recov_avx2(int disks, size_t bytes, int faila,
23 int failb, void **ptrs)
24{
25 u8 *p, *q, *dp, *dq;
26 const u8 *pbmul; /* P multiplier table for B data */
27 const u8 *qmul; /* Q multiplier table (for both) */
28 const u8 x0f = 0x0f;
29
30 p = (u8 *)ptrs[disks-2];
31 q = (u8 *)ptrs[disks-1];
32
33 /* Compute syndrome with zero for the missing data pages
34 Use the dead data pages as temporary storage for
35 delta p and delta q */
36 dp = (u8 *)ptrs[faila];
37 ptrs[faila] = (void *)raid6_empty_zero_page;
38 ptrs[disks-2] = dp;
39 dq = (u8 *)ptrs[failb];
40 ptrs[failb] = (void *)raid6_empty_zero_page;
41 ptrs[disks-1] = dq;
42
43 raid6_call.gen_syndrome(disks, bytes, ptrs);
44
45 /* Restore pointer table */
46 ptrs[faila] = dp;
47 ptrs[failb] = dq;
48 ptrs[disks-2] = p;
49 ptrs[disks-1] = q;
50
51 /* Now, pick the proper data tables */
52 pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
53 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
54 raid6_gfexp[failb]]];
55
56 kernel_fpu_begin();
57
58 /* ymm0 = x0f[16] */
59 asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f));
60
61 while (bytes) {
62#ifdef CONFIG_X86_64
63 asm volatile("vmovdqa %0, %%ymm1" : : "m" (q[0]));
64 asm volatile("vmovdqa %0, %%ymm9" : : "m" (q[32]));
65 asm volatile("vmovdqa %0, %%ymm0" : : "m" (p[0]));
66 asm volatile("vmovdqa %0, %%ymm8" : : "m" (p[32]));
67 asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (dq[0]));
68 asm volatile("vpxor %0, %%ymm9, %%ymm9" : : "m" (dq[32]));
69 asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (dp[0]));
70 asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (dp[32]));
71
72 /*
73 * 1 = dq[0] ^ q[0]
74 * 9 = dq[32] ^ q[32]
75 * 0 = dp[0] ^ p[0]
76 * 8 = dp[32] ^ p[32]
77 */
78
79 asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0]));
80 asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16]));
81
82 asm volatile("vpsraw $4, %ymm1, %ymm3");
83 asm volatile("vpsraw $4, %ymm9, %ymm12");
84 asm volatile("vpand %ymm7, %ymm1, %ymm1");
85 asm volatile("vpand %ymm7, %ymm9, %ymm9");
86 asm volatile("vpand %ymm7, %ymm3, %ymm3");
87 asm volatile("vpand %ymm7, %ymm12, %ymm12");
88 asm volatile("vpshufb %ymm9, %ymm4, %ymm14");
89 asm volatile("vpshufb %ymm1, %ymm4, %ymm4");
90 asm volatile("vpshufb %ymm12, %ymm5, %ymm15");
91 asm volatile("vpshufb %ymm3, %ymm5, %ymm5");
92 asm volatile("vpxor %ymm14, %ymm15, %ymm15");
93 asm volatile("vpxor %ymm4, %ymm5, %ymm5");
94
95 /*
96 * 5 = qx[0]
97 * 15 = qx[32]
98 */
99
100 asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0]));
101 asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16]));
102 asm volatile("vpsraw $4, %ymm0, %ymm2");
103 asm volatile("vpsraw $4, %ymm8, %ymm6");
104 asm volatile("vpand %ymm7, %ymm0, %ymm3");
105 asm volatile("vpand %ymm7, %ymm8, %ymm14");
106 asm volatile("vpand %ymm7, %ymm2, %ymm2");
107 asm volatile("vpand %ymm7, %ymm6, %ymm6");
108 asm volatile("vpshufb %ymm14, %ymm4, %ymm12");
109 asm volatile("vpshufb %ymm3, %ymm4, %ymm4");
110 asm volatile("vpshufb %ymm6, %ymm1, %ymm13");
111 asm volatile("vpshufb %ymm2, %ymm1, %ymm1");
112 asm volatile("vpxor %ymm4, %ymm1, %ymm1");
113 asm volatile("vpxor %ymm12, %ymm13, %ymm13");
114
115 /*
116 * 1 = pbmul[px[0]]
117 * 13 = pbmul[px[32]]
118 */
119 asm volatile("vpxor %ymm5, %ymm1, %ymm1");
120 asm volatile("vpxor %ymm15, %ymm13, %ymm13");
121
122 /*
123 * 1 = db = DQ
124 * 13 = db[32] = DQ[32]
125 */
126 asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
127 asm volatile("vmovdqa %%ymm13,%0" : "=m" (dq[32]));
128 asm volatile("vpxor %ymm1, %ymm0, %ymm0");
129 asm volatile("vpxor %ymm13, %ymm8, %ymm8");
130
131 asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0]));
132 asm volatile("vmovdqa %%ymm8, %0" : "=m" (dp[32]));
133
134 bytes -= 64;
135 p += 64;
136 q += 64;
137 dp += 64;
138 dq += 64;
139#else
140 asm volatile("vmovdqa %0, %%ymm1" : : "m" (*q));
141 asm volatile("vmovdqa %0, %%ymm0" : : "m" (*p));
142 asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (*dq));
143 asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (*dp));
144
145 /* 1 = dq ^ q; 0 = dp ^ p */
146
147 asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0]));
148 asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16]));
149
150 /*
151 * 1 = dq ^ q
152 * 3 = dq ^ p >> 4
153 */
154 asm volatile("vpsraw $4, %ymm1, %ymm3");
155 asm volatile("vpand %ymm7, %ymm1, %ymm1");
156 asm volatile("vpand %ymm7, %ymm3, %ymm3");
157 asm volatile("vpshufb %ymm1, %ymm4, %ymm4");
158 asm volatile("vpshufb %ymm3, %ymm5, %ymm5");
159 asm volatile("vpxor %ymm4, %ymm5, %ymm5");
160
161 /* 5 = qx */
162
163 asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0]));
164 asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16]));
165
166 asm volatile("vpsraw $4, %ymm0, %ymm2");
167 asm volatile("vpand %ymm7, %ymm0, %ymm3");
168 asm volatile("vpand %ymm7, %ymm2, %ymm2");
169 asm volatile("vpshufb %ymm3, %ymm4, %ymm4");
170 asm volatile("vpshufb %ymm2, %ymm1, %ymm1");
171 asm volatile("vpxor %ymm4, %ymm1, %ymm1");
172
173 /* 1 = pbmul[px] */
174 asm volatile("vpxor %ymm5, %ymm1, %ymm1");
175 /* 1 = db = DQ */
176 asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
177
178 asm volatile("vpxor %ymm1, %ymm0, %ymm0");
179 asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0]));
180
181 bytes -= 32;
182 p += 32;
183 q += 32;
184 dp += 32;
185 dq += 32;
186#endif
187 }
188
189 kernel_fpu_end();
190}
191
192static void raid6_datap_recov_avx2(int disks, size_t bytes, int faila,
193 void **ptrs)
194{
195 u8 *p, *q, *dq;
196 const u8 *qmul; /* Q multiplier table */
197 const u8 x0f = 0x0f;
198
199 p = (u8 *)ptrs[disks-2];
200 q = (u8 *)ptrs[disks-1];
201
202 /* Compute syndrome with zero for the missing data page
203 Use the dead data page as temporary storage for delta q */
204 dq = (u8 *)ptrs[faila];
205 ptrs[faila] = (void *)raid6_empty_zero_page;
206 ptrs[disks-1] = dq;
207
208 raid6_call.gen_syndrome(disks, bytes, ptrs);
209
210 /* Restore pointer table */
211 ptrs[faila] = dq;
212 ptrs[disks-1] = q;
213
214 /* Now, pick the proper data tables */
215 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
216
217 kernel_fpu_begin();
218
219 asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f));
220
221 while (bytes) {
222#ifdef CONFIG_X86_64
223 asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0]));
224 asm volatile("vmovdqa %0, %%ymm8" : : "m" (dq[32]));
225 asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0]));
226 asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (q[32]));
227
228 /*
229 * 3 = q[0] ^ dq[0]
230 * 8 = q[32] ^ dq[32]
231 */
232 asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0]));
233 asm volatile("vmovapd %ymm0, %ymm13");
234 asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16]));
235 asm volatile("vmovapd %ymm1, %ymm14");
236
237 asm volatile("vpsraw $4, %ymm3, %ymm6");
238 asm volatile("vpsraw $4, %ymm8, %ymm12");
239 asm volatile("vpand %ymm7, %ymm3, %ymm3");
240 asm volatile("vpand %ymm7, %ymm8, %ymm8");
241 asm volatile("vpand %ymm7, %ymm6, %ymm6");
242 asm volatile("vpand %ymm7, %ymm12, %ymm12");
243 asm volatile("vpshufb %ymm3, %ymm0, %ymm0");
244 asm volatile("vpshufb %ymm8, %ymm13, %ymm13");
245 asm volatile("vpshufb %ymm6, %ymm1, %ymm1");
246 asm volatile("vpshufb %ymm12, %ymm14, %ymm14");
247 asm volatile("vpxor %ymm0, %ymm1, %ymm1");
248 asm volatile("vpxor %ymm13, %ymm14, %ymm14");
249
250 /*
251 * 1 = qmul[q[0] ^ dq[0]]
252 * 14 = qmul[q[32] ^ dq[32]]
253 */
254 asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0]));
255 asm volatile("vmovdqa %0, %%ymm12" : : "m" (p[32]));
256 asm volatile("vpxor %ymm1, %ymm2, %ymm2");
257 asm volatile("vpxor %ymm14, %ymm12, %ymm12");
258
259 /*
260 * 2 = p[0] ^ qmul[q[0] ^ dq[0]]
261 * 12 = p[32] ^ qmul[q[32] ^ dq[32]]
262 */
263
264 asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
265 asm volatile("vmovdqa %%ymm14, %0" : "=m" (dq[32]));
266 asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0]));
267 asm volatile("vmovdqa %%ymm12,%0" : "=m" (p[32]));
268
269 bytes -= 64;
270 p += 64;
271 q += 64;
272 dq += 64;
273#else
274 asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0]));
275 asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0]));
276
277 /* 3 = q ^ dq */
278
279 asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0]));
280 asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16]));
281
282 asm volatile("vpsraw $4, %ymm3, %ymm6");
283 asm volatile("vpand %ymm7, %ymm3, %ymm3");
284 asm volatile("vpand %ymm7, %ymm6, %ymm6");
285 asm volatile("vpshufb %ymm3, %ymm0, %ymm0");
286 asm volatile("vpshufb %ymm6, %ymm1, %ymm1");
287 asm volatile("vpxor %ymm0, %ymm1, %ymm1");
288
289 /* 1 = qmul[q ^ dq] */
290
291 asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0]));
292 asm volatile("vpxor %ymm1, %ymm2, %ymm2");
293
294 /* 2 = p ^ qmul[q ^ dq] */
295
296 asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
297 asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0]));
298
299 bytes -= 32;
300 p += 32;
301 q += 32;
302 dq += 32;
303#endif
304 }
305
306 kernel_fpu_end();
307}
308
309const struct raid6_recov_calls raid6_recov_avx2 = {
310 .data2 = raid6_2data_recov_avx2,
311 .datap = raid6_datap_recov_avx2,
312 .valid = raid6_has_avx2,
313#ifdef CONFIG_X86_64
314 .name = "avx2x2",
315#else
316 .name = "avx2x1",
317#endif
318 .priority = 2,
319};
320
321#else
322#warning "your version of binutils lacks AVX2 support"
323#endif
diff --git a/lib/raid6/recov_ssse3.c b/lib/raid6/recov_ssse3.c
index ecb710c0b4d..a9168328f03 100644
--- a/lib/raid6/recov_ssse3.c
+++ b/lib/raid6/recov_ssse3.c
@@ -7,8 +7,6 @@
7 * of the License. 7 * of the License.
8 */ 8 */
9 9
10#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
11
12#include <linux/raid/pq.h> 10#include <linux/raid/pq.h>
13#include "x86.h" 11#include "x86.h"
14 12
@@ -332,5 +330,3 @@ const struct raid6_recov_calls raid6_recov_ssse3 = {
332#endif 330#endif
333 .priority = 1, 331 .priority = 1,
334}; 332};
335
336#endif
diff --git a/lib/raid6/sse1.c b/lib/raid6/sse1.c
index 10dd91948c0..f7629713944 100644
--- a/lib/raid6/sse1.c
+++ b/lib/raid6/sse1.c
@@ -21,7 +21,7 @@
21 * worthwhile as a separate implementation. 21 * worthwhile as a separate implementation.
22 */ 22 */
23 23
24#if defined(__i386__) && !defined(__arch_um__) 24#ifdef CONFIG_X86_32
25 25
26#include <linux/raid/pq.h> 26#include <linux/raid/pq.h>
27#include "x86.h" 27#include "x86.h"
diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c
index bc2d57daa58..85b82c85f28 100644
--- a/lib/raid6/sse2.c
+++ b/lib/raid6/sse2.c
@@ -17,8 +17,6 @@
17 * 17 *
18 */ 18 */
19 19
20#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
21
22#include <linux/raid/pq.h> 20#include <linux/raid/pq.h>
23#include "x86.h" 21#include "x86.h"
24 22
@@ -159,9 +157,7 @@ const struct raid6_calls raid6_sse2x2 = {
159 1 /* Has cache hints */ 157 1 /* Has cache hints */
160}; 158};
161 159
162#endif 160#ifdef CONFIG_X86_64
163
164#if defined(__x86_64__) && !defined(__arch_um__)
165 161
166/* 162/*
167 * Unrolled-by-4 SSE2 implementation 163 * Unrolled-by-4 SSE2 implementation
@@ -259,4 +255,4 @@ const struct raid6_calls raid6_sse2x4 = {
259 1 /* Has cache hints */ 255 1 /* Has cache hints */
260}; 256};
261 257
262#endif 258#endif /* CONFIG_X86_64 */
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index c76151d9476..087332dbf8a 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -10,6 +10,31 @@ LD = ld
10AWK = awk -f 10AWK = awk -f
11AR = ar 11AR = ar
12RANLIB = ranlib 12RANLIB = ranlib
13OBJS = int1.o int2.o int4.o int8.o int16.o int32.o recov.o algos.o tables.o
14
15ARCH := $(shell uname -m 2>/dev/null | sed -e /s/i.86/i386/)
16ifeq ($(ARCH),i386)
17 CFLAGS += -DCONFIG_X86_32
18 IS_X86 = yes
19endif
20ifeq ($(ARCH),x86_64)
21 CFLAGS += -DCONFIG_X86_64
22 IS_X86 = yes
23endif
24
25ifeq ($(IS_X86),yes)
26 OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o
27 CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1" | \
28 gcc -c -x assembler - >&/dev/null && \
29 rm ./-.o && echo -DCONFIG_AS_AVX2=1)
30else
31 HAS_ALTIVEC := $(shell echo -e '\#include <altivec.h>\nvector int a;' |\
32 gcc -c -x c - >&/dev/null && \
33 rm ./-.o && echo yes)
34 ifeq ($(HAS_ALTIVEC),yes)
35 OBJS += altivec1.o altivec2.o altivec4.o altivec8.o
36 endif
37endif
13 38
14.c.o: 39.c.o:
15 $(CC) $(CFLAGS) -c -o $@ $< 40 $(CC) $(CFLAGS) -c -o $@ $<
@@ -22,9 +47,7 @@ RANLIB = ranlib
22 47
23all: raid6.a raid6test 48all: raid6.a raid6test
24 49
25raid6.a: int1.o int2.o int4.o int8.o int16.o int32.o mmx.o sse1.o sse2.o \ 50raid6.a: $(OBJS)
26 altivec1.o altivec2.o altivec4.o altivec8.o recov.o recov_ssse3.o algos.o \
27 tables.o
28 rm -f $@ 51 rm -f $@
29 $(AR) cq $@ $^ 52 $(AR) cq $@ $^
30 $(RANLIB) $@ 53 $(RANLIB) $@
diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h
index d55d63232c5..b7595484a81 100644
--- a/lib/raid6/x86.h
+++ b/lib/raid6/x86.h
@@ -45,19 +45,23 @@ static inline void kernel_fpu_end(void)
45#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ 45#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */
46#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */ 46#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */
47#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ 47#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */
48#define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */
48#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ 49#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
49 50
50/* Should work well enough on modern CPUs for testing */ 51/* Should work well enough on modern CPUs for testing */
51static inline int boot_cpu_has(int flag) 52static inline int boot_cpu_has(int flag)
52{ 53{
53 u32 eax = (flag & 0x20) ? 0x80000001 : 1; 54 u32 eax, ebx, ecx, edx;
54 u32 ecx, edx; 55
56 eax = (flag & 0x100) ? 7 :
57 (flag & 0x20) ? 0x80000001 : 1;
58 ecx = 0;
55 59
56 asm volatile("cpuid" 60 asm volatile("cpuid"
57 : "+a" (eax), "=d" (edx), "=c" (ecx) 61 : "+a" (eax), "=b" (ebx), "=d" (edx), "+c" (ecx));
58 : : "ebx");
59 62
60 return ((flag & 0x80 ? ecx : edx) >> (flag & 31)) & 1; 63 return ((flag & 0x100 ? ebx :
64 (flag & 0x80) ? ecx : edx) >> (flag & 31)) & 1;
61} 65}
62 66
63#endif /* ndef __KERNEL__ */ 67#endif /* ndef __KERNEL__ */