From c71a896154119f4ca9e89d6078f5f63ad60ef199 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 23 Jan 2009 12:06:27 -0200 Subject: blktrace: add ftrace plugin Impact: New way of using the blktrace infrastructure This drops the requirement of userspace utilities to use the blktrace facility. Configuration is done thru sysfs, adding a "trace" directory to the partition directory where blktrace can be enabled for the associated request_queue. The same filters present in the IOCTL interface are present as sysfs device attributes. The /sys/block/sdX/sdXN/trace/enable file allows tracing without any filters. The other files in this directory: pid, act_mask, start_lba and end_lba can be used with the same meaning as with the IOCTL interface. Using the sysfs interface will only setup the request_queue->blk_trace fields, tracing will only take place when the "blk" tracer is selected via the ftrace interface, as in the following example: To see the trace, one can use the /d/tracing/trace file or the /d/tracign/trace_pipe file, with semantics defined in the ftrace documentation in Documentation/ftrace.txt. [root@f10-1 ~]# cat /t/trace kjournald-305 [000] 3046.491224: 8,1 A WBS 6367 + 8 <- (8,1) 6304 kjournald-305 [000] 3046.491227: 8,1 Q R 6367 + 8 [kjournald] kjournald-305 [000] 3046.491236: 8,1 G RB 6367 + 8 [kjournald] kjournald-305 [000] 3046.491239: 8,1 P NS [kjournald] kjournald-305 [000] 3046.491242: 8,1 I RBS 6367 + 8 [kjournald] kjournald-305 [000] 3046.491251: 8,1 D WB 6367 + 8 [kjournald] kjournald-305 [000] 3046.491610: 8,1 U WS [kjournald] 1 -0 [000] 3046.511914: 8,1 C RS 6367 + 8 [6367] [root@f10-1 ~]# The default line context (prefix) format is the one described in the ftrace documentation, with the blktrace specific bits using its existing format, described in blkparse(8). If one wants to have the classic blktrace formatting, this is possible by using: [root@f10-1 ~]# echo blk_classic > /t/trace_options [root@f10-1 ~]# cat /t/trace 8,1 0 3046.491224 305 A WBS 6367 + 8 <- (8,1) 6304 8,1 0 3046.491227 305 Q R 6367 + 8 [kjournald] 8,1 0 3046.491236 305 G RB 6367 + 8 [kjournald] 8,1 0 3046.491239 305 P NS [kjournald] 8,1 0 3046.491242 305 I RBS 6367 + 8 [kjournald] 8,1 0 3046.491251 305 D WB 6367 + 8 [kjournald] 8,1 0 3046.491610 305 U WS [kjournald] 1 8,1 0 3046.511914 0 C RS 6367 + 8 [6367] [root@f10-1 ~]# Using the ftrace standard format allows more flexibility, such as the ability of asking for backtraces via trace_options: [root@f10-1 ~]# echo noblk_classic > /t/trace_options [root@f10-1 ~]# echo stacktrace > /t/trace_options [root@f10-1 ~]# cat /t/trace kjournald-305 [000] 3318.826779: 8,1 A WBS 6375 + 8 <- (8,1) 6312 kjournald-305 [000] 3318.826782: <= submit_bio <= submit_bh <= sync_dirty_buffer <= journal_commit_transaction <= kjournald <= kthread <= child_rip kjournald-305 [000] 3318.826836: 8,1 Q R 6375 + 8 [kjournald] kjournald-305 [000] 3318.826837: <= generic_make_request <= submit_bio <= submit_bh <= sync_dirty_buffer <= journal_commit_transaction <= kjournald <= kthread Please read the ftrace documentation to use aditional, standardized tracing filters such as /d/tracing/trace_cpumask, etc. See also /d/tracing/trace_mark to add comments in the trace stream, that is equivalent to the /d/block/sdaN/msg interface. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Ingo Molnar --- fs/partitions/check.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs/partitions') diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 6d720243f5f4..01714efdc65a 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -268,6 +268,10 @@ ssize_t part_fail_store(struct device *dev, } #endif +#ifdef CONFIG_BLK_DEV_IO_TRACE +extern struct attribute_group blk_trace_attr_group; +#endif + static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL); static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); @@ -294,6 +298,9 @@ static struct attribute_group part_attr_group = { static struct attribute_group *part_attr_groups[] = { &part_attr_group, +#ifdef CONFIG_BLK_DEV_IO_TRACE + &blk_trace_attr_group, +#endif NULL }; -- cgit v1.2.2 From 157f9c00e88529ed84bd7d581a40d411e5414cf0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 26 Jan 2009 15:00:56 -0200 Subject: tracing/blktrace: fix up checkpatch reported problems in ftrace plugin patch Also make sure sparse (make C=2 block/blktrace.o) is happy too. Reported-by: Ingo Molnar Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Ingo Molnar --- fs/partitions/check.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'fs/partitions') diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 01714efdc65a..8a17f7edcc74 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "check.h" @@ -268,10 +269,6 @@ ssize_t part_fail_store(struct device *dev, } #endif -#ifdef CONFIG_BLK_DEV_IO_TRACE -extern struct attribute_group blk_trace_attr_group; -#endif - static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL); static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); -- cgit v1.2.2 From f67f129e519fa87f8ebd236b6336fe43f31ee141 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 1 Mar 2009 21:10:49 +0800 Subject: Driver core: implement uevent suppress in kobject This patch implements uevent suppress in kobject and removes it from struct device, based on the following ideas: 1,Uevent sending should be one attribute of kobject, so suppressing it in kobject layer is more natural than in device layer. By this way, we can do it for other objects embedded with kobject. 2,It may save several bytes for each instance of struct device.(On my omap3(32bit ARM) based box, can save 8bytes per device object) This patch also introduces dev_set|get_uevent_suppress() helpers to set and query uevent_suppress attribute in case to help kobject as private part of struct device in future. [This version is against the latest driver-core patch set of Greg,please ignore the last version.] Signed-off-by: Ming Lei Signed-off-by: Greg Kroah-Hartman --- fs/partitions/check.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/partitions') diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 6d720243f5f4..38e337d51ced 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -400,7 +400,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, pdev->devt = devt; /* delay uevent until 'holders' subdir is created */ - pdev->uevent_suppress = 1; + dev_set_uevent_suppress(pdev, 1); err = device_add(pdev); if (err) goto out_put; @@ -410,7 +410,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, if (!p->holder_dir) goto out_del; - pdev->uevent_suppress = 0; + dev_set_uevent_suppress(pdev, 0); if (flags & ADDPART_FLAG_WHOLEDISK) { err = device_create_file(pdev, &dev_attr_whole_disk); if (err) @@ -422,7 +422,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, rcu_assign_pointer(ptbl->part[partno], p); /* suppress uevent if the disk supresses it */ - if (!ddev->uevent_suppress) + if (!dev_get_uevent_suppress(pdev)) kobject_uevent(&pdev->kobj, KOBJ_ADD); return p; @@ -455,7 +455,7 @@ void register_disk(struct gendisk *disk) dev_set_name(ddev, disk->disk_name); /* delay uevents, until we scanned partition table */ - ddev->uevent_suppress = 1; + dev_set_uevent_suppress(ddev, 1); if (device_add(ddev)) return; @@ -490,7 +490,7 @@ void register_disk(struct gendisk *disk) exit: /* announce disk after possible partitions are created */ - ddev->uevent_suppress = 0; + dev_set_uevent_suppress(ddev, 0); kobject_uevent(&ddev->kobj, KOBJ_ADD); /* announce possible partitions */ -- cgit v1.2.2 From b44b0ab3bac16356f03e94b1b49ba9305710c445 Mon Sep 17 00:00:00 2001 From: Stefan Weinhuber Date: Thu, 26 Mar 2009 15:23:47 +0100 Subject: [S390] dasd: add large volume support The dasd device driver will now support ECKD devices with more then 65520 cylinders. In the traditional ECKD adressing scheme each track is addressed by a 16-bit cylinder and 16-bit head number. The new addressing scheme makes use of the fact that the actual number of heads is never larger then 15, so 12 bits of the head number can be redefined to be part of the cylinder address. Signed-off-by: Stefan Weinhuber Signed-off-by: Martin Schwidefsky --- fs/partitions/ibm.c | 101 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 71 insertions(+), 30 deletions(-) (limited to 'fs/partitions') diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c index 1e064c4a4f86..46297683cd34 100644 --- a/fs/partitions/ibm.c +++ b/fs/partitions/ibm.c @@ -21,20 +21,38 @@ * compute the block number from a * cyl-cyl-head-head structure */ -static inline int +static sector_t cchh2blk (struct vtoc_cchh *ptr, struct hd_geometry *geo) { - return ptr->cc * geo->heads * geo->sectors + - ptr->hh * geo->sectors; + + sector_t cyl; + __u16 head; + + /*decode cylinder and heads for large volumes */ + cyl = ptr->hh & 0xFFF0; + cyl <<= 12; + cyl |= ptr->cc; + head = ptr->hh & 0x000F; + return cyl * geo->heads * geo->sectors + + head * geo->sectors; } /* * compute the block number from a * cyl-cyl-head-head-block structure */ -static inline int +static sector_t cchhb2blk (struct vtoc_cchhb *ptr, struct hd_geometry *geo) { - return ptr->cc * geo->heads * geo->sectors + - ptr->hh * geo->sectors + + + sector_t cyl; + __u16 head; + + /*decode cylinder and heads for large volumes */ + cyl = ptr->hh & 0xFFF0; + cyl <<= 12; + cyl |= ptr->cc; + head = ptr->hh & 0x000F; + return cyl * geo->heads * geo->sectors + + head * geo->sectors + ptr->b; } @@ -43,14 +61,15 @@ cchhb2blk (struct vtoc_cchhb *ptr, struct hd_geometry *geo) { int ibm_partition(struct parsed_partitions *state, struct block_device *bdev) { - int blocksize, offset, size,res; - loff_t i_size; + int blocksize, res; + loff_t i_size, offset, size, fmt_size; dasd_information2_t *info; struct hd_geometry *geo; char type[5] = {0,}; char name[7] = {0,}; union label_t { - struct vtoc_volume_label vol; + struct vtoc_volume_label_cdl vol; + struct vtoc_volume_label_ldl lnx; struct vtoc_cms_label cms; } *label; unsigned char *data; @@ -85,14 +104,16 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) if (data == NULL) goto out_readerr; - strncpy (type, data, 4); - if ((!info->FBA_layout) && (!strcmp(info->type, "ECKD"))) - strncpy(name, data + 8, 6); - else - strncpy(name, data + 4, 6); memcpy(label, data, sizeof(union label_t)); put_dev_sector(sect); + if ((!info->FBA_layout) && (!strcmp(info->type, "ECKD"))) { + strncpy(type, label->vol.vollbl, 4); + strncpy(name, label->vol.volid, 6); + } else { + strncpy(type, label->lnx.vollbl, 4); + strncpy(name, label->lnx.volid, 6); + } EBCASC(type, 4); EBCASC(name, 6); @@ -110,36 +131,54 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) /* * VM style CMS1 labeled disk */ + blocksize = label->cms.block_size; if (label->cms.disk_offset != 0) { printk("CMS1/%8s(MDSK):", name); /* disk is reserved minidisk */ - blocksize = label->cms.block_size; offset = label->cms.disk_offset; size = (label->cms.block_count - 1) * (blocksize >> 9); } else { printk("CMS1/%8s:", name); offset = (info->label_block + 1); - size = i_size >> 9; + size = label->cms.block_count + * (blocksize >> 9); } + put_partition(state, 1, offset*(blocksize >> 9), + size-offset*(blocksize >> 9)); } else { - /* - * Old style LNX1 or unlabeled disk - */ - if (strncmp(type, "LNX1", 4) == 0) - printk ("LNX1/%8s:", name); - else + if (strncmp(type, "LNX1", 4) == 0) { + printk("LNX1/%8s:", name); + if (label->lnx.ldl_version == 0xf2) { + fmt_size = label->lnx.formatted_blocks + * (blocksize >> 9); + } else if (!strcmp(info->type, "ECKD")) { + /* formated w/o large volume support */ + fmt_size = geo->cylinders * geo->heads + * geo->sectors * (blocksize >> 9); + } else { + /* old label and no usable disk geometry + * (e.g. DIAG) */ + fmt_size = i_size >> 9; + } + size = i_size >> 9; + if (fmt_size < size) + size = fmt_size; + offset = (info->label_block + 1); + } else { + /* unlabeled disk */ printk("(nonl)"); - offset = (info->label_block + 1); - size = i_size >> 9; - } - put_partition(state, 1, offset*(blocksize >> 9), + size = i_size >> 9; + offset = (info->label_block + 1); + } + put_partition(state, 1, offset*(blocksize >> 9), size-offset*(blocksize >> 9)); + } } else if (info->format == DASD_FORMAT_CDL) { /* * New style CDL formatted disk */ - unsigned int blk; + sector_t blk; int counter; /* @@ -166,7 +205,8 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) /* skip FMT4 / FMT5 / FMT7 labels */ if (f1.DS1FMTID == _ascebc['4'] || f1.DS1FMTID == _ascebc['5'] - || f1.DS1FMTID == _ascebc['7']) { + || f1.DS1FMTID == _ascebc['7'] + || f1.DS1FMTID == _ascebc['9']) { blk++; data = read_dev_sector(bdev, blk * (blocksize/512), @@ -174,8 +214,9 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) continue; } - /* only FMT1 valid at this point */ - if (f1.DS1FMTID != _ascebc['1']) + /* only FMT1 and 8 labels valid at this point */ + if (f1.DS1FMTID != _ascebc['1'] && + f1.DS1FMTID != _ascebc['8']) break; /* OK, we got valid partition data */ -- cgit v1.2.2