diff --git a/ab_scsi.d b/ab_scsi.d new file mode 100644 index 0000000..b31a210 --- /dev/null +++ b/ab_scsi.d @@ -0,0 +1,942 @@ +#!/usr/sbin/dtrace -qCs + +/* + * Chris.Gerhard@sun.com + * Joel.Buckley@sun.com + */ + +#pragma ident "@(#)scsi.d 1.18 09/03/20 SMI" + +/* + * SCSI logging via dtrace. + * + * See http://blogs.sun.com/chrisg/tags/scsi.d + * + * Usage: + * + * scsi.d [ @T time ][ hba [ X ]] + * + * With no arguments it logs every scsi packet out and as many returns + * as it can find. + * + * So to trace all the io going via instance 2 qus driver do: + * + * scsi.d qus 2 + * + * To do the same trace for 20 seconds do: + * + * scsi.d @T 20 qus 2 + * + * NOTE: the "@" is used instead of the more traditional "-" as dtrace will + * continue to parse arguments so would see "-T" as an argument to dtrace and + * produce a usage error. + * + * Temporarily, ie I intend to change this in the future, scsi.d is also + * taking some more arguments using the -D options as well. I suspect I will + * get rid of the bizarre @T options above and change to using -D completely + * but that will be some point in the future, if at all. + * + * The new options are: + * + * -D EXECNAME='"foo"' + * Which results scsi.d only reporting IO associated with the application "foo". + * -D PRINT_STACK + * Which results in scsi.d printing a kernel stack trace after every outgoing + * packet. + * -D QUIET + * Which results in none of the packets being printed. Kind of pointless + * without another option. + * -D PERF_REPORT + * Which results in a report of how long IOs took aggregated per HBA useful + * with -D QUIET to get performance statistics. + * -D TARGET_STATS + * aggregate the stats based on the target. + * -D LUN_STATS + * aggregate the stats based on the LUN. Requires TARGET_STATS + * -D DYNVARSIZE + * pass this value to the #pragma D option dynvarsize= option. + * -D HBA + * the name of the HBA we are interested in. + * -D MIN_LBA + * Only report logical blocks over this value + * -D MAX_LBA + * Only IOs to report logical blocks that are less than this value. + * -D REPORT_OVERTIME=N + * Only report IOs that have taken longer than this number of nanoseconds. + * This only stops the printing of the packets not the collection of + * statistics. + * There are some tuning options that take effect only when + * REPORT_OVERTIME is set. These are: + * -D NSPEC=N + * Set the number of speculations to this value. + * -D SPECSIZE=N + * Set the size of the speculaton buffer. This should be 200 * + * the size of NSPEC. + * -D CLEANRATE=N + * Specify the clean rate. + * + * Finally scsi.d will also now accept the dtrace -c and -p options to trace + * just the commands or process given. + * + * Since dtrace does not output in real time it is useful to sort the output + * of the script using sort -n to get the entries in chronological order. + * + * NOTE: This does not directly trace what goes onto the scsi bus or fibre, + * to do so would require this script have knowledge of every HBA that could + * ever be connected to a system. It traces the SCSI packets as they are + * passed from the target driver to the HBA in the SCSA layer and then back + * again. Although to get the packet when it is returned it guesses that the + * packet will be destroyed using scsi_destroy_pkt and not modified before it + * is. So far this has worked but there is no garauntee that it will work for + * all HBAs and target drivers in the future. + * + */ + +/* + * This can be used as a framework to report on any rountine that has + * a scsi_pkt passed into it. Search for "FRAMEWORK" to see where you + * have to add bits. + */ +#pragma D option quiet +#pragma D option defaultargs + +#ifdef DYNVARSIZE +#pragma D option dynvarsize=DYNVARSIZE +#endif + +#ifdef REPORT_OVERTIME + +#ifdef NSPEC +#pragma D option nspec=NSPEC +#else +#pragma D option nspec=1000 +#endif /* NSPEC */ + + +#ifdef SPECSIZE +#pragma D option specsize=SPECSIZE +#else +#pragma D option specsize=200000 +#endif /* SPECSIZE */ + +#ifdef CLEANRATE +#pragma D option cleanrate=CLEANRATE +#endif /* CLEANRATE */ + +#endif /* REPORT_OVERTIME */ + +/* + * Have to include scsi_types.h so that in releases where scsi_address + * contains a union and #defines for a_target, a_lun & a_sublun they get + * picked up. + */ +#include + +/* + * Now work around CR 6798038. Depending on how that CR is fixed this + * workaround may have to be revisited so that the script will work in + * all cases. + */ +#ifdef a_lun +#undef a_lun +#define my_alun a.spi.a_lun +#else +#define my_alun a_lun +#endif + +#ifdef a_target +#undef a_target +#define my_atarget a.spi.a_target +#else +#define my_atarget a_target +#endif + +/* + * Just for completeness. The script does not use sublun so these are not + * needed but do no harm. + */ +#ifdef a_sublun +#undef a_sublun +#define my_asublun a.spi.a_sublun +#else +#define my_asublun a_sublun +#endif + +/* + * Set both lba & len to 64Bit values, to avoid type mismatch later... + * + * The following does not function due to DTrace insufficient registers... + * + * #define INT64(A, B, C, D, E, F, G, H) ( \ + * (((A) & 0x0ffL) << 56) | \ + * (((B) & 0x0ffL) << 48) | \ + * (((C) & 0x0ffL) << 40) | \ + * (((D) & 0x0ffL) << 32) | \ + * (((E) & 0x0ffL) << 24) | \ + * (((F) & 0x0ffL) << 16) | \ + * (((G) & 0x0ffL) << 8) | \ + * ((H) & 0x0ffL)) + * + * Instead, calculate lower & upper values, then pass them into this macro: + */ +#define INT64(A, B) ( \ + (((A) & 0x0ffffffffLL) << 32) | \ + ((B) & 0x0ffffffffL)) + +#define INT32(A, B, C, D) ( \ + (((A) & 0x0ffL) << 24) | \ + (((B) & 0x0ffL) << 16) | \ + (((C) & 0x0ffL) << 8) | \ + ((D) & 0x0ffL)) +#define INT16(A, B) ( \ + (((A) & 0x0ffL) << 8) | \ + ((B) & 0x0ffL)) +#define INT8(A) ( \ + ((A) & 0x0ffL)) + +/* #define A_TO_TRAN(ap) ((ap)->a_hba_tran) */ +#define P_TO_TRAN(pkt) ((pkt)->pkt_address.a_hba_tran) +/* #define P_TO_ADDR(pkt) (&((pkt)->pkt_address)) */ +#define P_TO_DEVINFO(pkt) ((struct dev_info *)(P_TO_TRAN(pkt)->tran_hba_dip)) + +#define DEV_NAME(pkt) \ + stringof(`devnamesp[P_TO_DEVINFO(pkt)->devi_major].dn_name) /* ` */ + +#define DEV_INST(pkt) (P_TO_DEVINFO(pkt)->devi_instance) + +#ifdef MIN_BLOCK +#define MIN_TEST && this->lba >= (MIN_BLOCK) +#else +#define MIN_TEST +#endif +#ifdef MAX_BLOCK +#define MAX_TEST && (this->lba <= ((MAX_BLOCK) + this->len)) +#else +#define MAX_TEST +#endif + +#define ARGUMENT_TEST (this->pkt && (hba_set == 0 || \ + hba == DEV_NAME(this->pkt) && \ + (inst_set == 0 || inst == DEV_INST(this->pkt))) MIN_TEST MAX_TEST) + +/* + * PRINT_DEV_FROM_PKT + * + * From a scsi_pkt get the name of the driver and the instance of the + * hba. + */ +#define PRINT_DEV_FROM_PKT(pkt) \ + printf("%s%d:", DEV_NAME(pkt), DEV_INST(pkt)) + +#define PRINT_TIMESTAMP() printf("%5.5d.%9.9d ", \ + (timestamp - script_start_time)/1000000000, \ + (timestamp - script_start_time)%1000000000); + +/* + * Look away now. + * + * This is just hackery to get around me being so stubborn I + * don't want to wrap this in a shell script. + */ +#ifdef HBA +BEGIN { + hba = HBA; + hba_set = 1; + timeout = 0; + inst_set = 0; + inst = 0; +} +#else + +#define TIMEOUT_SELECT "@T" + +#define DO_ARGS(N, NS, M, MS) \ +BEGIN \ +/ hba_set == 0 && NS != "" && NS != TIMEOUT_SELECT / \ +{ \ + hba = NS; \ + hba_set = 1; \ +} \ +BEGIN \ +/ hba_set == 1 && inst_set == 0 && MS != "" / \ +{ \ + inst = M; \ + inst_set = 1; \ +} \ + +BEGIN +/ $$1 == TIMEOUT_SELECT / +{ + timeout = $2; +} + +#endif + +BEGIN +/ $$target != "" / +{ + target = $target; +} + +BEGIN +{ +#ifdef EXECNAME + scsi_d_target_execname = EXECNAME; +#else + scsi_d_target_execname = ""; +#endif +} +#ifndef HBA +DO_ARGS($1, $$1, $2, $$2) +DO_ARGS($3, $$3, $4, $$4) +#endif + +/* + * You can open your eyes again now. + */ + +BEGIN { +#ifdef REPORT_OVERTIME + printf("Only reporting IOs longer than %dns\n", REPORT_OVERTIME); +#endif + printf("Hit Control C to interrupt\n"); + script_start_time = timestamp; + end_time = timestamp + (timeout * 1000000000); + + scsi_ops[0x000, 0x0] = "TEST_UNIT_READY"; + scsi_ops[0x001, 0x0] = "REZERO_UNIT_or_REWIND"; + scsi_ops[0x003, 0x0] = "REQUEST_SENSE"; + scsi_ops[0x004, 0x0] = "FORMAT_UNIT"; + scsi_ops[0x005, 0x0] = "READ_BLOCK_LIMITS"; + scsi_ops[0x006, 0x0] = "Unknown(06)"; + scsi_ops[0x007, 0x0] = "REASSIGN_BLOCKS"; + scsi_ops[0x008, 0x0] = "READ(6)"; + scsi_ops[0x009, 0x0] = "Unknown(09)"; + scsi_ops[0x00a, 0x0] = "WRITE(6)"; + scsi_ops[0x00b, 0x0] = "SEEK(6)"; + scsi_ops[0x00c, 0x0] = "Unknown(0c)"; + scsi_ops[0x00d, 0x0] = "Unknown(0d)"; + scsi_ops[0x00e, 0x0] = "Unknown(0e)"; + scsi_ops[0x00f, 0x0] = "READ_REVERSE"; + scsi_ops[0x010, 0x0] = "WRITE_FILEMARK"; + scsi_ops[0x011, 0x0] = "SPACE"; + scsi_ops[0x012, 0x0] = "INQUIRY"; + scsi_ops[0x013, 0x0] = "VERIFY"; + scsi_ops[0x014, 0x0] = "Unknown(14)"; + scsi_ops[0x015, 0x0] = "MODE_SELECT(6)"; + scsi_ops[0x016, 0x0] = "RESERVE(6)"; + scsi_ops[0x017, 0x0] = "RELEASE(6)"; + scsi_ops[0x018, 0x0] = "COPY"; + scsi_ops[0x019, 0x0] = "ERASE(6)"; + scsi_ops[0x01a, 0x0] = "MODE_SENSE(6)"; + scsi_ops[0x01b, 0x0] = "START_STOP_UNIT"; + scsi_ops[0x01c, 0x0] = "RECIEVE_DIAGNOSTIC_RESULTS"; + scsi_ops[0x01d, 0x0] = "SEND_DIAGNOSTIC"; + scsi_ops[0x01e, 0x0] = "PREVENT_ALLOW_MEDIUM_REMOVAL"; + scsi_ops[0x01f, 0x0] = "Unknown(1f)"; + scsi_ops[0x020, 0x0] = "Unknown(20)"; + scsi_ops[0x021, 0x0] = "Unknown(21)"; + scsi_ops[0x022, 0x0] = "Unknown(22)"; + scsi_ops[0x023, 0x0] = "READ_FORMAT_CAPACITY"; + scsi_ops[0x024, 0x0] = "Unknown(24)"; + scsi_ops[0x025, 0x0] = "READ_CAPACITY(10)"; + scsi_ops[0x026, 0x0] = "Unknown(26)"; + scsi_ops[0x027, 0x0] = "Unknown(27)"; + scsi_ops[0x028, 0x0] = "READ(10)"; + scsi_ops[0x02a, 0x0] = "WRITE(10)"; + scsi_ops[0x02b, 0x0] = "SEEK(10)_or_LOCATE(10)"; + scsi_ops[0x02e, 0x0] = "WRITE_AND_VERIFY(10)"; + scsi_ops[0x02f, 0x0] = "VERIFY(10)"; + scsi_ops[0x030, 0x0] = "SEARCH_DATA_HIGH"; + scsi_ops[0x031, 0x0] = "SEARCH_DATA_EQUAL"; + scsi_ops[0x032, 0x0] = "SEARCH_DATA_LOW"; + scsi_ops[0x033, 0x0] = "SET_LIMITS(10)"; + scsi_ops[0x034, 0x0] = "PRE-FETCH(10)"; + scsi_ops[0x035, 0x0] = "SYNCHRONIZE_CACHE(10)"; + scsi_ops[0x036, 0x0] = "LOCK_UNLOCK_CACHE(10)"; + scsi_ops[0x037, 0x0] = "READ_DEFECT_DATA(10)"; + scsi_ops[0x039, 0x0] = "COMPARE"; + scsi_ops[0x03a, 0x0] = "COPY_AND_WRITE"; + scsi_ops[0x03b, 0x0] = "WRITE_BUFFER"; + scsi_ops[0x03c, 0x0] = "READ_BUFFER"; + scsi_ops[0x03e, 0x0] = "READ_LONG"; + scsi_ops[0x03f, 0x0] = "WRITE_LONG"; + scsi_ops[0x040, 0x0] = "CHANGE_DEFINITION"; + scsi_ops[0x041, 0x0] = "WRITE_SAME(10)"; + scsi_ops[0x04c, 0x0] = "LOG_SELECT"; + scsi_ops[0x04d, 0x0] = "LOG_SENSE"; + scsi_ops[0x050, 0x0] = "XDWRITE(10)"; + scsi_ops[0x051, 0x0] = "XPWRITE(10)"; + scsi_ops[0x052, 0x0] = "XDREAD(10)"; + scsi_ops[0x053, 0x0] = "XDWRITEREAD(10)"; + scsi_ops[0x055, 0x0] = "MODE_SELECT(10)"; + scsi_ops[0x056, 0x0] = "RESERVE(10)"; + scsi_ops[0x057, 0x0] = "RELEASE(10)"; + scsi_ops[0x05a, 0x0] = "MODE_SENSE(10)"; + scsi_ops[0x05e, 0x0] = "PERSISTENT_RESERVE_IN"; + scsi_ops[0x05f, 0x0] = "PERSISTENT_RESERVE_OUT"; + scsi_ops[0x07f, 0x0] = "Variable_Length_CDB"; + scsi_ops[0x07f, 0x3] = "XDREAD(32)"; + scsi_ops[0x07f, 0x4] = "XDWRITE(32)"; + scsi_ops[0x07f, 0x6] = "XPWRITE(32)"; + scsi_ops[0x07f, 0x7] = "XDWRITEREAD(32)"; + scsi_ops[0x07f, 0x9] = "READ(32)"; + scsi_ops[0x07f, 0xb] = "WRITE(32)"; + scsi_ops[0x07f, 0xa] = "VERIFY(32)"; + scsi_ops[0x07f, 0xc] = "WRITE_AND_VERIFY(32)"; + scsi_ops[0x080, 0x0] = "XDWRITE_EXTENDED(16)"; + scsi_ops[0x081, 0x0] = "REBUILD(16)"; + scsi_ops[0x082, 0x0] = "REGENERATE(16)"; + scsi_ops[0x083, 0x0] = "EXTENDED_COPY"; + scsi_ops[0x086, 0x0] = "ACCESS_CONTROL_IN"; + scsi_ops[0x087, 0x0] = "ACCESS_CONTROL_OUT"; + scsi_ops[0x088, 0x0] = "READ(16)"; + scsi_ops[0x08a, 0x0] = "WRITE(16)"; + scsi_ops[0x08c, 0x0] = "READ_ATTRIBUTES"; + scsi_ops[0x08d, 0x0] = "WRITE_ATTRIBUTES"; + scsi_ops[0x08e, 0x0] = "WRITE_AND_VERIFY(16)"; + scsi_ops[0x08f, 0x0] = "VERIFY(16)"; + scsi_ops[0x090, 0x0] = "PRE-FETCH(16)"; + scsi_ops[0x091, 0x0] = "SYNCHRONIZE_CACHE(16)"; + scsi_ops[0x092, 0x0] = "LOCK_UNLOCK_CACHE(16)_or_LOCATE(16)"; + scsi_ops[0x093, 0x0] = "WRITE_SAME(16)_or_ERASE(16)"; + scsi_ops[0x09e, 0x0] = "SERVICE_IN_or_READ_CAPACITY(16)"; + scsi_ops[0x0a0, 0x0] = "REPORT_LUNS"; + scsi_ops[0x0a3, 0x0] = "MAINTENANCE_IN_or_REPORT_TARGET_PORT_GROUPS"; + scsi_ops[0x0a4, 0x0] = "MAINTENANCE_OUT_or_SET_TARGET_PORT_GROUPS"; + scsi_ops[0x0a7, 0x0] = "MOVE_MEDIUM"; + scsi_ops[0x0a8, 0x0] = "READ(12)"; + scsi_ops[0x0aa, 0x0] = "WRITE(12)"; + scsi_ops[0x0ae, 0x0] = "WRITE_AND_VERIFY(12)"; + scsi_ops[0x0af, 0x0] = "VERIFY(12)"; + scsi_ops[0x0b3, 0x0] = "SET_LIMITS(12)"; + scsi_ops[0x0b4, 0x0] = "READ_ELEMENT_STATUS"; + scsi_ops[0x0b7, 0x0] = "READ_DEFECT_DATA(12)"; + scsi_ops[0x0ba, 0x0] = "REDUNDANCY_GROUP_IN"; + scsi_ops[0x0bb, 0x0] = "REDUNDANCY_GROUP_OUT"; + scsi_ops[0x0bc, 0x0] = "SPARE_IN"; + scsi_ops[0x0bd, 0x0] = "SPARE_OUT"; + scsi_ops[0x0be, 0x0] = "VOLUME_SET_IN"; + scsi_ops[0x0bf, 0x0] = "VOLUME_SET_OUT"; + scsi_ops[0x0d0, 0x0] = "EXPLICIT_LUN_FAILOVER"; + scsi_ops[0x0f1, 0x0] = "STOREDGE_CONTROLLER"; + + scsi_reasons[0] = "COMPLETED"; + scsi_reasons[1] = "INCOMPLETE"; + scsi_reasons[2] = "DMA_ERR"; + scsi_reasons[3] = "TRAN_ERR"; + scsi_reasons[4] = "RESET"; + scsi_reasons[5] = "ABORTED"; + scsi_reasons[6] = "TIMEOUT"; + scsi_reasons[7] = "DATA_OVERRUN"; + scsi_reasons[8] = "COMMAND_OVERRUN"; + scsi_reasons[9] = "STATUS_OVERRUN"; + scsi_reasons[10] = "Bad_Message"; + scsi_reasons[11] = "No_Message_Out"; + scsi_reasons[12] = "XID_Failed"; + scsi_reasons[13] = "IDE_Failed"; + scsi_reasons[14] = "Abort_Failed"; + scsi_reasons[15] = "Reject_Failed"; + scsi_reasons[16] = "Nop_Failed"; + scsi_reasons[17] = "Message_Parity_Error_Failed"; + scsi_reasons[18] = "Bus_Device_Reset_Failed"; + scsi_reasons[19] = "Identify_Message_Rejected"; + scsi_reasons[20] = "Unexpected_Bus_free"; + scsi_reasons[21] = "Tag_Rejected"; + scsi_reasons[22] = "TERMINATED"; + scsi_reasons[24] = "Device_Gone"; + + scsi_state[0] = "Success"; + scsi_state[0x2] = "Check_Condition"; + scsi_state[0x4] = "Condition_Met"; + scsi_state[0x8] = "Busy"; + scsi_state[0x10] = "Intermidiate"; + scsi_state[0x14] = "Intermidiate-Condition_Met"; + scsi_state[0x18] = "Reservation_Conflict"; + scsi_state[0x22] = "Command_Terminated"; + scsi_state[0x28] = "Queue_Full"; + scsi_state[0x30] = "ACA_Active"; + + +/* FRAMEWORK:- add your special string to this array */ + names[0] = "->"; /* Command sent */ + names[1] = "<-"; /* Command returning */ + names[2] = "ILL"; /* Illegal Request spotted */ +} + +fbt:scsi:scsi_transport:entry, +fbt:scsi:scsi_destroy_pkt:entry +/ timeout != 0 && end_time < timestamp / +{ + exit(0); +} + +/* + * relying on scsi_destroy_pkt to get the response is a + * hack that may or may not always work depending on HBA + * and target drivers. + * With arc case: PSARC/2009/033 I can use scsi_hba_pkt_comp(). When that + * is out in the wild it should be used. + */ +/* + * If in any of these probes this->pkt is initialized to zero + * the knock on effect is none of the other probes for this clause further + * down get run. + * + * Do not just comment the probes out. Clause local variables are not + * initialized by default so you will see run time dtrace errors if you do + * where other variables do not get set. + */ + +#define PROBE_SWITCH \ + ((target == 0 || pid == target ) && \ + (scsi_d_target_execname == "" || scsi_d_target_execname == execname)) + +fbt:scsi:scsi_transport:entry +/ !PROBE_SWITCH / +{ + this->pkt = (struct scsi_pkt *)0; +} +fbt:scsi:scsi_transport:entry +/ PROBE_SWITCH / +{ + this->pkt = (struct scsi_pkt *)arg0; +} + +fbt:scsi:scsi_destroy_pkt:entry +{ + this->pkt = (struct scsi_pkt *)arg0; +} + +/* + * FRAMEWORK:- create your entry probe and make this->pkt point to the + * argument that has the scsi_pkt. eg add one like this: + * + */ +fbt:*sd:*sd_sense_key_illegal_request:entry +/ !PROBE_SWITCH / +{ + this->pkt = (struct scsi_pkt *)0; +} +fbt:*sd:*sd_sense_key_illegal_request:entry +/ PROBE_SWITCH / +{ + this->pkt = (struct scsi_pkt *)arg3; +} + +/* FRAMEWORK:- Add your probe name to the list for CDB_PROBES */ +#define CDB_PROBES \ +fbt:scsi:scsi_transport:entry, \ +fbt:*sd:*sd_sense_key_illegal_request:entry + +#define ENTRY_PROBES \ +CDB_PROBES, \ +fbt:scsi:scsi_destroy_pkt:entry + +ENTRY_PROBES +/ this->pkt / +{ + this->cdb = (uchar_t *)this->pkt->pkt_cdbp; + this->scb = (uchar_t *)this->pkt->pkt_scbp; + this->group = ((this->cdb[0] & 0xe0) >> 5); + this->lbalen = 0; +} +/* + * 5.11 allows this->name = "->" but vanilla 5.10 does not so I'm sticking + * to the setting a variable and then choosing the string from the global + * array. + */ +fbt:scsi:scsi_transport:entry +/ this->pkt / +{ + start_time[this->scb] = timestamp; + this->name = 0; +} + +/* + * If we are filtering on target pid or execname then only report + * the return packets that we know we sent. + */ +fbt:scsi:scsi_destroy_pkt:entry +/ start_time[this->scb] == 0 && ( target != 0 || scsi_d_target_execname != "") / +{ + this->pkt = 0; +} + +fbt:scsi:scsi_destroy_pkt:entry +/ this->pkt / +{ + this->name = 1; +} +/* + * FRAMEWORK: Add your probe and set this->name to the offset of + * your string in the names table. + */ +fbt:*sd:*sd_sense_key_illegal_request:entry +/ this->pkt / +{ + this->name = 2; +} + +/* + * Now cope with the differnent CDB layouts based on the group. + * + * Trying to use x = this->group == 0 ? y : z; results in D running out + * of registers so it is all done using seperate probes. + * + * Group Listing: + * + Group 0: 6Byte CDBs: scsi_ops[0x000] thru scsi_ops[0x01f] + * + Group 1: 10Byte CDBs: scsi_ops[0x020] thru scsi_ops[0x03f] + * + Group 2: 10Byte CDBs: scsi_ops[0x040] thru scsi_ops[0x05f] + * + Group 3: Variable Length CDBs: scsi_ops[0x060] thru scsi_ops[0x07f] + * + Group 4: 16Byte CDBs: scsi_ops[0x080] thru scsi_ops[0x09f] + * + Group 5: 12Byte CDBs: scsi_ops[0x0a0] thru scsi_ops[0x0bf] + * Group 6: Vendor Specific CDBs: scsi_ops[0x0c0] thru scsi_ops[0x0df] + * Group 7: Vendor Specific CDBs: scsi_ops[0x0e0] thru scsi_ops[0x0ff] + * + * The groups with a leading plus sign "+" are of importance. + */ +ENTRY_PROBES +/ this->pkt && this->group == 0 / +{ + this->lba = INT32(0, (this->cdb[1] & 0x1f), + this->cdb[2], this->cdb[3]); + this->lbalen = 6; + this->len = INT8(this->cdb[4]); + this->control = this->cdb[5]; + this->sa = 0; + this->cdblen = 6; +} + +ENTRY_PROBES +/ this->pkt && this->group == 1 / +{ + this->lba = INT32(this->cdb[2], this->cdb[3], + this->cdb[4], this->cdb[5]); + this->lbalen = 8; + this->len = INT16(this->cdb[7], this->cdb[8]); + this->control = this->cdb[9]; + this->sa = 0; + this->cdblen = 10; +} + +ENTRY_PROBES +/ this->pkt && this->group == 2 / +{ + this->lba = INT32(this->cdb[2], this->cdb[3], + this->cdb[4], this->cdb[5]); + this->lbalen = 8; + this->len = INT16(this->cdb[7], this->cdb[8]); + this->control = this->cdb[9]; + this->sa = 0; + this->cdblen = 10; +} + +ENTRY_PROBES +/ this->pkt && this->group == 3 / +{ + /* + * This is to get around insufficient DTrace Registers... + * + * Instead of doing this in a single step like: + * + * this->lba = INT64(\ + * this->cdb[2], this->cdb[3], this->cdb[4], this->cdb[5], \ + * this->cdb[6], this->cdb[7], this->cdb[8], this->cdb[9]); + * + * The int64 LBA value must be calculated in 5 steps... + */ + this->lbaUpper = INT32(this->cdb[12], this->cdb[13], + this->cdb[14], this->cdb[15]); + this->lbaLower = INT32(this->cdb[16], this->cdb[17], + this->cdb[18], this->cdb[19]); + this->lba = INT64(this->lbaUpper, this->lbaLower); + this->lbaUpper = 0; + this->lbaLower = 0; + + this->lbalen = 16; + this->len = INT32(this->cdb[28], this->cdb[29], + this->cdb[30], this->cdb[31]); + this->control = this->cdb[1]; + this->sa = INT16(this->cdb[8], this->cdb[9]); + this->cdblen = 32 +} + +ENTRY_PROBES +/ this->pkt && this->group == 4 / +{ + /* + * This is to get around insufficient DTrace Registers... + * + * Instead of doing this in a single step like: + * + * this->lba = INT64(\ + * this->cdb[2], this->cdb[3], this->cdb[4], this->cdb[5], \ + * this->cdb[6], this->cdb[7], this->cdb[8], this->cdb[9]); + * + * The int64 LBA value must be calculated in 5 steps... + */ + this->lbaUpper = INT32(this->cdb[2], this->cdb[3], + this->cdb[4], this->cdb[5]); + this->lbaLower = INT32(this->cdb[6], this->cdb[7], + this->cdb[8], this->cdb[9]); + this->lba = INT64(this->lbaUpper, this->lbaLower); + this->lbaUpper = 0; + this->lbaLower = 0; + + this->lbalen = 16; + this->len = INT32(this->cdb[10], this->cdb[11], + this->cdb[12], this->cdb[13]); + this->control = this->cdb[15]; + this->sa = 0; + this->cdblen = 16; +} + +ENTRY_PROBES +/ this->pkt && this->group == 5 / +{ + this->lba = INT32(this->cdb[2], this->cdb[3], + this->cdb[4], this->cdb[5]); + this->lbalen = 8; + this->len = INT32(this->cdb[6], this->cdb[7], + this->cdb[8], this->cdb[9]); + this->control = this->cdb[11]; + this->sa = 0; + this->cdblen = 12; +} +/* + * We don't know the format of the group 6 and 7 commands as they are + * vendor specific. So I just print the cdb up to a maximum of 32 bytes. + */ +ENTRY_PROBES +/ this->pkt && this->group > 5 / +{ + this->lba = 0; + this->lbalen = 0; + this->len = 0; + this->control = 0; + this->sa = 0; +/* + * At some point pkt_cdblen will make it into 5.10 but I can't at this moment + * workout how I can test this before the script compiles (without wrapping + * this in a ksh script which as I have mentioned above I'm not going to do). + */ +#ifndef __SunOS_5_10 + this->cdblen = this->pkt->pkt_cdblen; +#else + this->cdblen = 32; +#endif + +} + +/* + * The guts of the script. Report what we have if we are required. + * First the stuff we do for both outgoing and incoming. + */ + +ENTRY_PROBES +/ !ARGUMENT_TEST / +{ + this->arg_test_passed = 0; +} + +ENTRY_PROBES +/ ARGUMENT_TEST / +{ + this->arg_test_passed = 1; +} + +/* + * If only reporting IO that takes more then a certan time then use + * speculations to capture the outgoing data and then only commit the + * speculation if the packet takes too long. + */ + +#ifdef REPORT_OVERTIME + +#define SPECULATE speculate(specs[this->scb]); + +ENTRY_PROBES +/ this->arg_test_passed && specs[this->scb] == 0 / +{ + specs[this->scb] = speculation(); +} + +#else + +#define SPECULATE + +#endif /* REPORT_OVERTIME */ + +#ifndef QUIET +ENTRY_PROBES +/ this->arg_test_passed / +{ + SPECULATE + PRINT_TIMESTAMP(); + PRINT_DEV_FROM_PKT(this->pkt); + printf("%s 0x%2.2x %9s address %2.2d:%2.2d, lba 0x%*.*x, ", + names[this->name], + this->cdb[0], + scsi_ops[(uint_t)this->cdb[0], this->sa] != 0 ? + scsi_ops[(uint_t)this->cdb[0], this->sa] : "Unknown", + this->pkt->pkt_address.my_atarget, + this->pkt->pkt_address.my_alun, + this->lbalen, this->lbalen, + this->lba); + printf("len 0x%6.6x, control 0x%2.2x timeout %d CDBP %p", + this->len, this->control, + this->pkt->pkt_time, this->cdb); +} + +CDB_PROBES +/ this->arg_test_passed == 1 / +{ + SPECULATE + printf(" %d %s(%d)", this->arg_test_passed, execname, pid); +} + +/* + * For those who are just not happy without some raw hex numbers. Print the + * raw cdb. + */ +CDB_PROBES +/ this->arg_test_passed == 1 && this->cdblen / +{ + SPECULATE + printf(" cdb(%d) ", this->cdblen); +} + +/* + * Now print each byte out of the cdb. + */ + +#define PRINT_CDB(N) \ +CDB_PROBES \ +/ this->arg_test_passed == 1 && this->cdblen > N/ \ +{ \ + SPECULATE; \ + printf("%2.2x", this->cdb[N]) \ +} +#else /* QUIET */ +#define PRINT_CDB(N) +#endif /* QUIET */ + +PRINT_CDB(0) +PRINT_CDB(1) +PRINT_CDB(2) +PRINT_CDB(3) +PRINT_CDB(4) +PRINT_CDB(5) +PRINT_CDB(6) +PRINT_CDB(7) +PRINT_CDB(8) +PRINT_CDB(9) +PRINT_CDB(10) +PRINT_CDB(11) +PRINT_CDB(12) +PRINT_CDB(13) +PRINT_CDB(14) +PRINT_CDB(15) +PRINT_CDB(16) +PRINT_CDB(17) +PRINT_CDB(18) +PRINT_CDB(19) +PRINT_CDB(20) +PRINT_CDB(21) +PRINT_CDB(22) +PRINT_CDB(23) +PRINT_CDB(24) +PRINT_CDB(25) +PRINT_CDB(26) +PRINT_CDB(27) +PRINT_CDB(28) +PRINT_CDB(29) +PRINT_CDB(30) +PRINT_CDB(31) + +/* + * Now the result on the incoming. + */ +#ifdef PERF_REPORT +fbt:scsi:scsi_destroy_pkt:entry +/ this->arg_test_passed == 1 && start_time[this->scb] != 0 / +{ + @[DEV_NAME(this->pkt), DEV_INST(this->pkt) +#ifdef TARGET_STATS + , this->pkt->pkt_address.a_target +#ifdef LUN_STATS + , this->pkt->pkt_address.a_lun +#endif +#endif + ] = quantize(timestamp - start_time[this->scb]); +} +#endif + +fbt:scsi:scsi_destroy_pkt:entry +/ this->arg_test_passed == 1 / +{ + this->state = *(this->scb); + this->state = this->state & 0x3E; +} +#ifndef QUIET +fbt:scsi:scsi_destroy_pkt:entry +/ this->arg_test_passed == 1 / +{ + SPECULATE + printf(", reason 0x%x (%s) pkt_state 0x%x state 0x%x %s Time %dus\n", + this->pkt->pkt_reason, + scsi_reasons[this->pkt->pkt_reason] != 0 ? + scsi_reasons[this->pkt->pkt_reason] : "Unknown", + this->pkt->pkt_state, this->state, scsi_state[this->state] != 0 ? + scsi_state[this->state] : "Unknown", + start_time[this->scb] != 0 ? + (timestamp - start_time[this->scb])/1000 : 0); +} +#endif + +#ifndef QUIET +CDB_PROBES +/ this->arg_test_passed == 1 / +{ + SPECULATE + printf("\n"); +} +#endif +/* + * printing stacks of where we are called from can be useful however it + * does mean you can't just pipe the output through sort(1). Does not work + * with overtime as you can't put a stack into a speculation. + */ +#ifdef PRINT_STACK +CDB_PROBES +/ this->arg_test_passed == 1 / +{ + stack(10); +} +#endif + +#ifdef REPORT_OVERTIME +fbt:scsi:scsi_destroy_pkt:entry +/ this->arg_test_passed == 1 && +((timestamp - start_time[this->scb]) > REPORT_OVERTIME && start_time[this->scb]) / +{ + SPECULATE + commit(specs[this->scb]); + specs[this->scb] = 0; +} +fbt:scsi:scsi_destroy_pkt:entry +/ this->arg_test_passed == 1 && +! ((timestamp - start_time[this->scb]) > REPORT_OVERTIME) / +{ + discard(specs[this->scb]); + specs[this->scb] = 0; +} +#endif + +fbt:scsi:scsi_destroy_pkt:entry +/ this->arg_test_passed == 1 / +{ + start_time[this->scb] = 0; +} + diff --git a/arc_adjust.v2.d b/arc_adjust.v2.d index 78561ab..005a599 100644 --- a/arc_adjust.v2.d +++ b/arc_adjust.v2.d @@ -2,32 +2,34 @@ #pragma D option quiet arc_adjust:entry +/* Adjusted slightly to work on smartOS. Had to replace global `arc_meta_used var with +value from kstat data: `arc_stats.arcstat_meta_used.value.ui64. */ { - self->ts = walltimestamp; - printf("%Y 1836: adjustment = MIN( %d, %d)\n", walltimestamp, - (int64_t)(`arc_stats.arcstat_size.value.ui64 - `arc_stats.arcstat_c.value.ui64), - (int64_t)(`arc_anon->arcs_size + `arc_mru->arcs_size + `arc_meta_used - `arc_stats.arcstat_p.value.ui64)); - printf("arc_size = %d, arc_c = %d, arc_anon.size = %d, arc_mru.size = %d, arc_meta_used = %d, arc_p = %d\n", - `arc_stats.arcstat_size.value.ui64, - `arc_stats.arcstat_c.value.ui64, - `arc_anon->arcs_size, - `arc_mru->arcs_size, - `arc_meta_used, - `arc_stats.arcstat_p.value.ui64); - printf("arc_mfu.size = %d, arc_mfu_ghost.size = %d, arc_mru_ghost.size = %d, arc_l2c_only.size = %d\n", - `arc_mfu->arcs_size, - `arc_mfu_ghost->arcs_size, - `arc_mru_ghost->arcs_size, - `arc_l2c_only->arcs_size); + self->ts = walltimestamp; + printf("%Y 1836: adjustment = MIN( %d, %d)\n", walltimestamp, + (int64_t)(`arc_stats.arcstat_size.value.ui64 - `arc_stats.arcstat_c.value.ui64), + (int64_t)(`arc_anon->arcs_size + `arc_mru->arcs_size + `arc_stats.arcstat_meta_used.value.ui64 - `arc_stats.arcstat_p.value.ui64)); + printf("arc_size = %d, arc_c = %d, arc_anon.size = %d, arc_mru.size = %d, arc_meta_used = %d, arc_p = %d\n", + `arc_stats.arcstat_size.value.ui64, + `arc_stats.arcstat_c.value.ui64, + `arc_anon->arcs_size, + `arc_mru->arcs_size, + `arc_stats.arcstat_meta_used.value.ui64,, + `arc_stats.arcstat_p.value.ui64); + printf("arc_mfu.size = %d, arc_mfu_ghost.size = %d, arc_mru_ghost.size = %d, arc_l2c_only.size = %d\n", + `arc_mfu->arcs_size, + `arc_mfu_ghost->arcs_size, + `arc_mru_ghost->arcs_size, + `arc_l2c_only->arcs_size); } arc_shrink:entry { - printf("%Y 2085: to_free = MAX( %d, %d)\n", walltimestamp, - `arc_stats.arcstat_c.value.ui64 >> `arc_shrink_shift, `needfree*4096); + printf("%Y 2085: to_free = MAX( %d, %d)\n", walltimestamp, + `arc_stats.arcstat_c.value.ui64 >> `arc_shrink_shift, `needfree*4096); } arc_adjust:return { - printf("Returned from arc_adjust started at %Y %d ms later.\n", self->ts, (walltimestamp - self->ts)/1000000); + printf("Returned from arc_adjust started at %Y %d ms later.\n", self->ts, (walltimestamp - self->ts)/1000000); } diff --git a/arcaccess.d b/arcaccess.d new file mode 100644 index 0000000..709d921 --- /dev/null +++ b/arcaccess.d @@ -0,0 +1,23 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +dtrace:::BEGIN +{ + printf("lbolt rate is %d Hertz.\n", `hz); + printf("Tracing lbolts between ARC accesses..."); +} + +fbt::arc_access:entry +{ + self->ab = args[0]; + self->lbolt = args[0]->b_arc_access; +} + +fbt::arc_access:return +/self->lbolt/ +{ + @ = quantize(self->ab->b_arc_access - self->lbolt); + self->ab = 0; + self->lbolt = 0; +} \ No newline at end of file diff --git a/cifsops.d b/cifsops.d new file mode 100644 index 0000000..5356e76 --- /dev/null +++ b/cifsops.d @@ -0,0 +1,21 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +dtrace:::BEGIN +{ + trace("Tracing CIFS operations... Interval 5 secs.\n"); +} + +smb:::op-* +{ + @ops[args[0]->ci_remote, probename] = count(); +} + +profile:::tick-5sec, +dtrace:::END +{ + printf("\n %-32s %-30s %8s\n", "Client", "Operation", "Count"); + printa(" %-32s %-30s %@8d\n", @ops); + trunc(@ops); +} diff --git a/counter.d b/counter.d new file mode 100644 index 0000000..f0bdd3c --- /dev/null +++ b/counter.d @@ -0,0 +1,16 @@ +/* +* Count off and report the number of seconds elapsed +*/ +dtrace:::BEGIN + { + i = 0; + } +profile:::tick-1sec + { + i = i + 1; + trace(i); + } +dtrace:::END + { + trace(i); + } \ No newline at end of file diff --git a/diskioerr.d b/diskioerr.d new file mode 100644 index 0000000..8436c66 --- /dev/null +++ b/diskioerr.d @@ -0,0 +1,82 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet +#pragma D option dynvarsize=8m +#pragma D option switchrate=10hz +/* + Copyright 2012 Sam Zaydel - RackTop Systems + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +This script is intended to track IO where some buffer error was registered, +by looking for the B_ERROR flag in the bufinfo_t struct. +*/ + + + +dtrace:::BEGIN { + printf("Tracing... Hit Ctrl-C to end.\n"); +} +io:::start { + start_time[arg0] = timestamp; + fire = 1; +} +io:::done / (args[0]->b_flags & B_READ) && (args[0]->b_flags & B_ERROR) && (this->start = start_time[arg0]) / { + + @errors[args[1]->dev_pathname, "Error Code", args[0]->b_error] = count(); + this->delta = (timestamp - this->start) / 1000; +/* @plots["Read I/O, us", + execname, + args[1]->dev_statname, + args[1]->dev_pathname, + args[1]->dev_major, + args[1]->dev_minor] = quantize(this->delta); + @avgs["Avg Read I/O, us"] = avg(this->delta); + @avgbytes["Avg Read bytes"] = avg(args[0]->b_bcount); + @perio_t["Avg Read bytes/us"] = avg(args[0]->b_bcount / this->delta); +*/ + start_time[arg0] = 0; + error = 1; +} + +io:::done / !(args[0]->b_flags & B_READ) && (args[0]->b_flags & B_ERROR) && (this->start = start_time[arg0]) / { + + @errors[args[1]->dev_pathname, "Error Code",args[0]->b_error] = count(); + this->delta = (timestamp - this->start) / 1000; +/* @plots["Write I/O, us", + execname, + args[1]->dev_statname, + args[1]->dev_pathname, + args[1]->dev_major, + args[1]->dev_minor] = quantize(this->delta); + @avgs["Avg Write I/O, us"] = avg(this->delta); + @avgbytes["Avg Write bytes"] = avg(args[0]->b_bcount); + @perio_t["Avg Write bytes/us"] = avg(args[0]->b_bcount / this->delta); +*/ + start_time[arg0] = 0; + self->error = 1; + +} + +profile:::tick-5sec / error == 1 / { + printa("%s %s %d %@d\n", @errors); trunc(@errors); +/* printf("%Y\n", walltimestamp); + printa("%s %s %s :: %s\n (%d,%d), us:\n%@d\n", @plots); + printa("%18s %44@d\n", @avgs); + printa("%18s %44@d\n", @avgbytes); + printa("%18s %44@d\n", @perio_t); + printf("%s\n",""); + trunc(@avgs); trunc(@avgbytes); trunc(@perio_t); +*/ + error = 0; +} diff --git a/disklatency.d b/disklatency.d new file mode 100644 index 0000000..46169f6 --- /dev/null +++ b/disklatency.d @@ -0,0 +1,27 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +dtrace:::BEGIN +{ + printf("Tracing... Hit Ctrl-C to end.\n"); +} + +io:::start +{ + start_time[arg0] = timestamp; +} + +io:::done +/this->start = start_time[arg0]/ +{ + this->delta = (timestamp - this->start) / 1000; + @[args[1]->dev_statname, args[1]->dev_major, args[1]->dev_minor] = + quantize(this->delta); + start_time[arg0] = 0; +} + +dtrace:::END +{ + printa(" %s (%d,%d), us:\n%@d\n", @); +} \ No newline at end of file diff --git a/disklatency2.d b/disklatency2.d new file mode 100644 index 0000000..54e75f4 --- /dev/null +++ b/disklatency2.d @@ -0,0 +1,38 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +dtrace:::BEGIN +{ + printf("Tracing... Hit Ctrl-C to end.\n"); +} + +io:::start + +{ + start_time[arg0] = timestamp; +} + +io:::done +/this->start = start_time[arg0]/ +{ + this->delta = (timestamp - this->start) / 1000; + @[execname, args[1]->dev_statname, + args[1]->dev_pathname, + args[1]->dev_major, + args[1]->dev_minor] = quantize(this->delta); + start_time[arg0] = 0; +} + +profile:::tick-10sec + +{ + printa("%s %s :: %s\n (%d,%d), us:\n%@d\n ", @); + printf("%Y\n", walltimestamp); +} + +/*dtrace:::END +{ + printa("%s %s :: %s\n (%d,%d), us:\n%@d\n ", @); +} +*/ diff --git a/disklatencygen.d b/disklatencygen.d new file mode 100644 index 0000000..fe0ff50 --- /dev/null +++ b/disklatencygen.d @@ -0,0 +1,27 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +dtrace:::BEGIN +{ + printf("Tracing... Hit Ctrl-C to end.\n"); +} + +io:::start +{ + start_time[arg0] = timestamp; +} + +io:::done +/this->start = start_time[arg0]/ +{ + this->delta = (timestamp - this->start) / 1000; + @avgs[args[1]->dev_pathname,args[1]->dev_statname, args[1]->dev_major, args[1]->dev_minor] = + avg(this->delta); + start_time[arg0] = 0; +} + +tick-1sec +{ + printa("%s\t%s\t%d\t%d\t%@d\n", @avgs); +} diff --git a/disklatencyrw.d b/disklatencyrw.d new file mode 100644 index 0000000..35438f7 --- /dev/null +++ b/disklatencyrw.d @@ -0,0 +1,103 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet +#pragma D option dynvarsize=4m +#pragma D option switchrate=10hz +/* + Copyright 2012 Sam Zaydel - RackTop Systems + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Script will collect per device IO statistics, and will display a distribution +graph similar to one below, along with statistics printed at the bottom. This is a +per-device output. + +Read I/O, us sched sd4 :: /devices/pci@0,0/pci15ad,1976@10/sd@3,0:a + (292,256), us: + + value ------------- Distribution ------------- count + 8 | 0 + 16 | 6 + 32 |@ 10 + 64 | 0 + 128 |@@ 23 + 256 |@ 8 + 512 | 0 + 1024 | 0 + 2048 |@ 16 + 4096 |@ 10 + 8192 |@@@@@@@@ 97 + 16384 |@@@@@@@@@@@@ 151 + 32768 |@@@@@@@@@@ 125 + 65536 |@@@ 39 + 131072 | 4 + 262144 | 2 + 524288 | 0 + 1048576 | 2 + 2097152 | 0 + + Avg Write I/O, us 890 + Avg Read I/O, us 31407 + Avg Write bytes 8068 + Avg Read bytes 131072 + Avg Read bytes/us 6 +Avg Write bytes/us 7 + +*/ + +dtrace:::BEGIN { + printf("Tracing... Hit Ctrl-C to end.\n"); +} +io:::start { + start_time[arg0] = timestamp; + fire = 1; +} +io:::done / (args[0]->b_flags & B_READ) && (this->start = start_time[arg0]) / { + this->delta = (timestamp - this->start) / 1000; + @plots["Read I/O, us", + execname, + args[1]->dev_statname, + args[1]->dev_pathname, + args[1]->dev_major, + args[1]->dev_minor] = quantize(this->delta); + @avgs["Avg Read I/O, us"] = avg(this->delta); + @avgbytes["Avg Read bytes"] = avg(args[0]->b_bcount); + @perio_t["Avg Read bytes/us"] = avg(args[0]->b_bcount / this->delta); + start_time[arg0] = 0; +} + +io:::done / !(args[0]->b_flags & B_READ) && (this->start = start_time[arg0]) / { + this->delta = (timestamp - this->start) / 1000; + @plots["Write I/O, us", + execname, + args[1]->dev_statname, + args[1]->dev_pathname, + args[1]->dev_major, + args[1]->dev_minor] = quantize(this->delta); + @avgs["Avg Write I/O, us"] = avg(this->delta); + @avgbytes["Avg Write bytes"] = avg(args[0]->b_bcount); + /* self->w_lat = args[0]->b_bcount / self->delta; */ + @perio_t["Avg Write bytes/us"] = avg(args[0]->b_bcount / this->delta); + start_time[arg0] = 0; +} + +profile:::tick-5sec / fire > 0 / { + printf("%Y\n", walltimestamp); + printa("%s %s %s :: %s\n (%d,%d), us:\n%@d\n", @plots); + printa("%18s %44@d\n", @avgs); + printa("%18s %44@d\n", @avgbytes); + printa("%18s %44@d\n", @perio_t); + printf("%s\n",""); + trunc(@avgs); trunc(@avgbytes); trunc(@perio_t); + fire = 0; +} diff --git a/downstack.d b/downstack.d new file mode 100644 index 0000000..28737e7 --- /dev/null +++ b/downstack.d @@ -0,0 +1,23 @@ +#!/usr/sbin/dtrace -FCs + +$1:entry +{ + self->trace = 1; +} + +$1:return +{ + printf("Returns 0x%llx", arg1); + self->trace = 0; +} + +::entry +/self->trace == 1/ +{ +} + +::return +/self->trace == 1/ +{ + printf("Returns 0x%llx", arg1); +} diff --git a/filemon_zfs.d b/filemon_zfs.d new file mode 100644 index 0000000..3ec3794 --- /dev/null +++ b/filemon_zfs.d @@ -0,0 +1,94 @@ +#!/usr/sbin/dtrace -s + +/* + * + * filemon_zfs.d - Monitors specific file access + * Written using DTrace. + * + * + * $Id: filemon_zfs.d 1 2010-03-12 14:16:26Z sergio $ + * + * USAGE: filemon_zfs.d + * + * eg, + * ./filemon_zfs.d /var/adm/messages # Monitor access to /var/adm/messages + * + * Must be root or with DTrace role privilege + * + * NOTES: This script uses dtrace so it should work on Solaris or OpenSolaris + * + * THANKS: The students of a DTrace course for the idea + * + * COPYRIGHT: Copyright (c) 2008 Sergio Rodriguez de Guzman Martinez + * + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at Docs/cddl1.txt + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * CDDL HEADER END + * + * Author: Sergio Rodriguez de Guzman [Madrid, Spain] + * + * 12-03-2010 Sergio Rodriguez de Guzman Created this. + * + * + */ + +#pragma D option quiet + +BEGIN +{ + printf ("%20s%20s%8s%20s%10s%10s\n", "DATE", "CMD", "R/W/D", "PATH", "USER", "PID"); +} + +zfs_read:entry, +zfs_getpage:entry +{ + self->filepath = args[0]->v_path; +} + +zfs_write:entry, +zfs_putpage:entry +{ + self->filepath = args[0]->v_path; +} + +zfs_write:return, +zfs_putpage:return +/ strstr(stringof(self->filepath), $1) != NULL / +{ + + printf("%20Y%20s%8s% 20s%10d%10d\n", + walltimestamp, execname, "W", stringof(self->filepath), uid, pid); + self->filepath = 0; +} + +zfs_read:return, +zfs_getpage:return +/ strstr(stringof(self->filepath), $1) != NULL / +{ + printf("%20Y%20s%8s% 20s%10d%10d\n", + walltimestamp, execname, "R", stringof(self->filepath), uid, pid); + self->filepath = 0; +} + +zfs_remove:entry +{ + self->filepath = strjoin( stringof(args[0]->v_path), "/" ); + self->filepath = strjoin( self->filepath, stringof(args[1]) ); +} + +zfs_remove:return +/ strstr(stringof(self->filepath), $1) != NULL / +{ + printf("%20Y%20s%8s% 20s%10d%10d\n", + walltimestamp, execname, "D", stringof(self->filepath), uid, pid); +} diff --git a/findddt.d b/findddt.d new file mode 100644 index 0000000..a729d6b --- /dev/null +++ b/findddt.d @@ -0,0 +1,31 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +BEGIN +{ +fpath = $1 +} + +syscall::read:entry,syscall::write:entry +/ fds[arg0].fi_pathname == fpath / +{ +@fs[execname, fds[arg0].fi_pathname] = count(); +self->ts = timestamp; + } + +::ddt*:entry, ::zio_ddt*:entry +/self->ts && probefunc != "ddt_prefetch"/ +{ +@st[probefunc, stack()] = count(); +printf("%s %Y\n", probefunc, walltimestamp); + +self->ts = 0; +} + +END +{ +printa("Command: %-20s Count: %-@8d\nPath: %s", @fs); +trunc(@st,5); +exit(0); +} diff --git a/hotfile.d b/hotfile.d new file mode 100644 index 0000000..df6e582 --- /dev/null +++ b/hotfile.d @@ -0,0 +1,98 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +dtrace:::BEGIN +{ + printf("Tracing... Hit Ctrl-C to end.\n"); + line = "-----------"; + x = 0; + hotcount = 10; +} + +nfsv3:::op-read-start, +nfsv3:::op-write-start + +{ + this->filepath = args[1]->noi_curpath; + start[args[1]->noi_xid] = timestamp; + active_nfs = 1; + x = 1; +} + +io:::start +/active_nfs != 0/ + +{ + this->bytes = args[0]->b_bcount; + /* @bytesize[args[2]->fi_pathname,pid, curpsinfo->pr_psargs] = quantize(this->bytes); */ + @totalb[args[0]->b_flags & B_READ ? "[READ]" : "[WRITE]", args[2]->fi_pathname] = sum(this->bytes); +} + +nfsv3:::op-read-done, +nfsv3:::op-write-done +/start[args[1]->noi_xid] != 0/ + +{ + this->elapsed = timestamp - start[args[1]->noi_xid]; + /* this->filepath = args[1]->noi_curpath; */ + @nfs_rwtime[this->filepath, probename == "op-read-done" ? "Read time(us)" : "Write time(us)"] = + quantize(this->elapsed / 1000); + /* @host[args[0]->ci_remote] = sum(this->elapsed); */ + @rwlatency[args[0]->ci_remote, this->filepath, + probename == "op-read-done" ? "Read time" : "Write time"] = sum(this->elapsed); + + start[args[1]->noi_xid] = 0; + active_nfs = 0; +} + +nfsv3:::op-read-done +{ + this->bytes = args[2]->res_u.ok.data.data_len; + @nfsopsbypath["[READ]", this->filepath] = count(); + /* Collect sum of bytes for each client IP address */ + @nfsreadbytes[args[0]->ci_remote, "[READ]"] = sum(this->bytes); + @busyfiles["[READ]", this->filepath] = sum(this->bytes); +} + + +nfsv3:::op-write-done +{ + this->bytes = args[2]->res_u.ok.count; + @nfsopsbypath["[WRITE]", this->filepath] = count(); + /* Collect sum of bytes for each client IP address */ + @nfswritebytes[args[0]->ci_remote, "[WRITE]"] = sum(this->bytes); + @busyfiles["[WRITE]", this->filepath] = sum(this->bytes); +} + +tick-1sec +/ x != 0 / +{ + printa("Client IP: %-16s NFS I/O: %-8s %@d bytes\n", @nfsreadbytes); + printa("Client IP: %-16s NFS I/O: %-8s %@d bytes\n", @nfswritebytes); + printa(" Physical I/O: %s %@d bytes\n", @totalb); + normalize(@rwlatency,1000000); trunc(@rwlatency,5); trunc(@nfs_rwtime,5); + printa("Client IP: %-16s Path: %s\n%s (ms) %@d\n", @rwlatency); printa(@nfs_rwtime); + trunc(@nfs_rwtime); trunc(@rwlatency); + trunc(@nfsreadbytes); trunc(@nfswritebytes); trunc(@totalb); + x = 0; +} + +dtrace:::END +{ + /* trunc(@bytesize,5); + trunc(@totalb,5); + printf("\n%-60s %-8s %s\n", "Pathname", "PID", "CMD"); + printa("%-60s %-8d %S\n%@d\n", @bytesize); + printa("----------\nPath: %s\nBytes Total: %@d\n----------\n",@totalb); */ + /* Summarize total operations and number of bytes read and written + for each file, organized by path, and trimmed to number of files + equal to value of hotcount var. */ + printf("%s\n\n","---------------------- NFSv3 Read/Write top files Summary ----------------------"); + trunc(@busyfiles, hotcount); + trunc(@nfsopsbypath, hotcount); + + printa("%s Path: %s %@d bytes, Operations: %@d\n", @busyfiles,@nfsopsbypath); + /* normalize(@file, 1000); + printa(@file); */ +} diff --git a/io_slower_than_50ms.d b/io_slower_than_50ms.d new file mode 100644 index 0000000..8086413 --- /dev/null +++ b/io_slower_than_50ms.d @@ -0,0 +1,91 @@ +#!/usr/sbin/dtrace -s +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at http://smartos.org/CDDL + * + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file. + * + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Copyright (c) 2012 RackTop Systems, All rights reserved. + * + * Script should help to pinpoint disks which are seemingly slower than + * most, i.e. we can collect top 5 or in this case top 10 slowest to respond + * disks on a system. + * io_slower_than_50ms.d: show Physical I/O events taking longer than 50 ms. + * + * USAGE: io_slower_than_50ms.d + * + */ + +#pragma D option quiet +#pragma D option defaultargs +#pragma D option switchrate=10hz + +#pragma D option quiet + +BEGIN { + x = 0; + y = 0; + watermark = 50; /* We want to track all IO slower than 50ms. */ + start_d = timestamp; + to_millisec = 1000000; + div = "--------------------------------------------------------------------------------"; +} + +io::bdev_strategy:start +{ + st[args[0]->b_edev, args[0]->b_blkno, args[0]->b_addr] = timestamp; + blk_addr = args[0]->b_addr; +} + +io:::done +/ st[args[0]->b_edev, args[0]->b_blkno, args[0]->b_addr] / +{ + x = (timestamp - st[args[0]->b_edev, args[0]->b_blkno, args[0]->b_addr]) / to_millisec; + @[args[0]->b_bcount/0x1000] = lquantize(x,20,80,10); +} + +io:::done +/* We should arrive here eveery time we hip IO that passes the watermark +and we increment counter as well as add event to aggregation. */ +/ x > watermark / +{ + y += 1; + @slower_than_wm[args[1]->dev_pathname] = count(); +} + +io:::done +/ st[args[0]->b_edev, args[0]->b_blkno, args[0]->b_addr] / +{ + st[args[0]->b_edev, args[0]->b_blkno, args[0]->b_addr] = 0; + blk_addr = 0; + x = 0; +} + +END +{ + stop_d = (timestamp - start_d) / to_millisec; + trunc(@slower_than_wm,10); + printa("\t time(ms)\t\t\t\t blocksize (KB): %d %24@d\n",@); + printf("%s\n",div); + printf("Top 10 slow disks by count of IO events longer than %d (ms)\n",watermark); + printf("%s\n",div); + printa("Number of slow IO events: %@d device: %s\n",@slower_than_wm); + printf("%s\n",div); + printf("Counted a total of %d events slower than %d (ms) in last %d seconds\n",y,watermark,stop_d/1000); +} diff --git a/iofile.d b/iofile.d new file mode 100755 index 0000000..255057a --- /dev/null +++ b/iofile.d @@ -0,0 +1,79 @@ +#!/usr/sbin/dtrace -s +/* + * iofile.d - I/O wait time by filename and process. + * Written using DTrace (Solaris 10 3/05). + * + * This prints the total I/O wait times for each filename by process. + * This can help determine why an application is performing poorly by + * identifying which file they are waiting on, and the total times. + * Both disk and NFS I/O are measured. + * + * $Id: iofile.d 3 2007-08-01 10:50:08Z brendan $ + * + * USAGE: iofile.d # wait, then hit Ctrl-C to end + * + * FIELDS: + * PID Process ID + * CMD Process name + * TIME Total wait time for disk events, us + * FILE File pathname + * + * BASED ON: /usr/demo/dtrace/iocpu.d + * + * SEE ALSO: iosnoop, iotop + * + * PORTIONS: Copyright (c) 2005, 2006 Brendan Gregg. + * + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at Docs/cddl1.txt + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * CDDL HEADER END + * + * 24-Jul-2005 Brendan Gregg Created this. + * 20-Apr-2006 " " Last update. + */ + +#pragma D option quiet + +/* print header */ +dtrace:::BEGIN +{ + printf("Tracing... Hit Ctrl-C to end.\n"); +} + +/* save time at start */ +io:::wait-start +{ + self->start = timestamp; +} + +/* process event */ +io:::wait-done +/self->start/ +{ + /* + * wait-done is used as we are measing wait times. It also + * is triggered when the correct thread is on the CPU, obviating + * the need to link process details to the start event. + */ + this->elapsed = timestamp - self->start; + @files[pid, execname, args[2]->fi_pathname] = sum(this->elapsed); + self->start = 0; +} + +/* print report */ +dtrace:::END +{ + normalize(@files, 1000); + printf("%6s %-12s %8s %s\n", "PID", "CMD", "TIME", "FILE"); + printa("%6d %-12.12s %@8d %s\n", @files); +} diff --git a/iofileb.d b/iofileb.d new file mode 100755 index 0000000..e7572f3 --- /dev/null +++ b/iofileb.d @@ -0,0 +1,59 @@ +#!/usr/sbin/dtrace -s +/* + * iofileb.d - I/O bytes by filename and process. + * Written using DTrace (Solaris 10 3/05). + * + * This prints a summary of requested disk activity by pathname, + * providing totals of the I/O events in bytes. It is a companion to the + * iofile.d script - which prints in terms of I/O wait time, not bytes. + * I/O wait time is a better metric for understanding performance issues. + * Both disk and NFS I/O are measured. + * + * $Id: iofileb.d 3 2007-08-01 10:50:08Z brendan $ + * + * USAGE: iofileb.d # wait several seconds, then hit Ctrl-C + * + * FIELDS: + * PID process ID + * CMD command name + * KB Kilobytes of disk I/O + * FILE Full pathname of the file + * + * COPYRIGHT: Copyright (c) 2006 Brendan Gregg. + * + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at Docs/cddl1.txt + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * CDDL HEADER END + * + * 20-Feb-2006 Brendan Gregg Created this. + * 20-Feb-2006 " " Last update. + */ + +#pragma D option quiet + +dtrace:::BEGIN +{ + printf("Tracing... Hit Ctrl-C to end.\n"); +} + +io:::start +{ + @files[pid, execname, args[2]->fi_pathname] = sum(args[0]->b_bcount); +} + +dtrace:::END +{ + normalize(@files, 1024); + printf("%6s %-12s %6s %s\n", "PID", "CMD", "KB", "FILE"); + printa("%6d %-12.12s %@6d %s\n", @files); +} diff --git a/ioinfopervdev.d b/ioinfopervdev.d new file mode 100644 index 0000000..86168d4 --- /dev/null +++ b/ioinfopervdev.d @@ -0,0 +1,10 @@ +zio_vdev_io_start:entry +{ + printf("vdev_bytes_statistics: NULL: %d\tREAD: %d\tWRITE: %d\tFREE: %d\tCLAIM: %d\tIOCTL: %d\n", + args[0]->io_vd->vdev_stat.vs_bytes[0], /* NULL */ + args[0]->io_vd->vdev_stat.vs_bytes[1], /* READ */ + args[0]->io_vd->vdev_stat.vs_bytes[2], /* WRITE */ + args[0]->io_vd->vdev_stat.vs_bytes[3], /* FREE */ + args[0]->io_vd->vdev_stat.vs_bytes[4], /* CLAIM */ + args[0]->io_vd->vdev_stat.vs_bytes[5]); /* IOCTL */ +} diff --git a/iscsi-inactivity-monitor.d b/iscsi-inactivity-monitor.d new file mode 100644 index 0000000..1177846 --- /dev/null +++ b/iscsi-inactivity-monitor.d @@ -0,0 +1,84 @@ +#!/usr/sbin/dtrace -Cs + +/* + * iscsi-inactivity-monitor.d + * + * This dtrace script looks for the lack of iscsi target traffic and + * will log the non-event by timestamp. + * + * Sample output: + ** iSCSI target inactivity monitor + ** Inactivity threshold 11 seconds + ** Start time 2010 Dec 22 16:41:54 + ** No incoming iSCSI traffic since 2010 Dec 22 16:41:54, current time 2010 Dec 22 16:42:06 + ** No outgoing iSCSI traffic since 2010 Dec 22 16:41:54, current time 2010 Dec 22 16:42:06 + ** iSCSI inactivity monitor, 856 total transfers at 2010 Dec 22 17:46:37 + * + * Copyright 2010, Nexenta Systems, Inc. All rights reserved. + */ + +#pragma D option quiet + +inline int THRESHOLD = 11000000000; /* nanoseconds */ + +BEGIN { + n = 0; + start_active = 0; /* current activity flag */ + start_ts = timestamp + THRESHOLD; + start_ws = walltimestamp; + done_active = 0; + done_ts = start_ts; + done_ws = start_ws; + printf("iSCSI target inactivity monitor\n"); + printf("Inactivity threshold %d seconds\n", THRESHOLD/1000000000); + printf("Start time %Y\n", start_ws); +} + +iscsi:::xfer-start +{ + n++; + start_ts = timestamp + THRESHOLD; + start_ws = walltimestamp; +} +iscsi:::xfer-start +/!start_active/ +{ + start_active = 1; + printf("Incoming traffic at %Y\n", walltimestamp); +} + +profile:::tick-1sec +/timestamp > start_ts && start_active/ +{ + printf("No incoming iSCSI traffic since %Y, current time %Y\n", start_ws, walltimestamp); + start_active = 0; +} + +iscsi:::xfer-done +{ + done_ts = timestamp + THRESHOLD; + done_ws = walltimestamp; +} +iscsi:::xfer-done +/!done_active/ +{ + done_active = 1; + printf("Outgoing traffic at %Y\n", walltimestamp); +} + +profile:::tick-1sec +/timestamp > done_ts && done_active/ +{ + printf("No outgoing iSCSI traffic since %Y, current time %Y\n", done_ws, walltimestamp); + done_active = 0; +} + +profile:::tick-1hour +{ + printf("iSCSI target inactivity monitor status at %Y\n", walltimestamp); + printf("\tTotal transfers = %d\n", n); + printf("\tIncoming flow %s\n", start_active ? "active" : "inactive"); + printf("\tLast incoming transfer at %Y\n", start_ws); + printf("\tOutgoing flow %s\n", done_active ? "active" : "inactive"); + printf("\tLast outgoing transfer at %Y\n", done_ws); +} \ No newline at end of file diff --git a/iscsiio.d b/iscsiio.d new file mode 100644 index 0000000..332e2b0 --- /dev/null +++ b/iscsiio.d @@ -0,0 +1,61 @@ +#!/usr/sbin/dtrace -s +/* + * iscsiio.d - Report iSCSI I/O. Solaris Nevada, DTrace. + * + * This traces requested iSCSI data I/O events when run on an iSCSI server. + * The output reports iSCSI read and write I/O while this script was running. + * + * USAGE: iscsiio.d # Hit Ctrl-C to end + * + * FIELDS: + * REMOTE IP IP address of the client + * EVENT Data I/O event (read/write) + * COUNT Number of I/O events + * Kbytes Total data Kbytes transferred + * KB/sec Average data Kbytes/sec transferred + */ + +#pragma ident "@(#)iscsiio.d 1.2 07/03/27 SMI" + +#pragma D option quiet + +dtrace:::BEGIN +{ + printf("Tracing... Hit Ctrl-C to end.\n"); + start = timestamp; +} + +iscsi*:::data-send +{ + @num[args[0]->ci_remote, "read"] = count(); + @bytes[args[0]->ci_remote, "read"] = sum(args[1]->ii_datalen); + @rate[args[0]->ci_remote, "read"] = sum(args[1]->ii_datalen); +} + +iscsi*:::data-receive +{ + @num[args[0]->ci_remote, "write"] = count(); + @bytes[args[0]->ci_remote, "write"] = sum(args[1]->ii_datalen); + @rate[args[0]->ci_remote, "write"] = sum(args[1]->ii_datalen); +} + +iscsi*:::scsi-command +/args[2]->ic_cdb[0] == 0x0a || args[2]->ic_cdb[0] == 0x2a/ +{ + /* + * scsi-command writes also move data. Their codes are in + * /usr/include/sys/scsi/generic/commands.h . + */ + @num[args[0]->ci_remote, "write"] = count(); + @bytes[args[0]->ci_remote, "write"] = sum(args[1]->ii_datalen); + @rate[args[0]->ci_remote, "write"] = sum(args[1]->ii_datalen); +} + +dtrace:::END +{ + normalize(@rate, (timestamp - start) * 1024 / 1000000000); + printf(" %-26s %8s %8s %10s %10s\n", "REMOTE IP", "EVENT", "COUNT", + "Kbytes", "KB/sec"); + normalize(@bytes, 1024); + printa(" %-26s %8s %@8d %@10d %@10d\n", @num, @bytes, @rate); +} diff --git a/iscsirwlat.d b/iscsirwlat.d new file mode 100644 index 0000000..5c3f8a3 --- /dev/null +++ b/iscsirwlat.d @@ -0,0 +1,69 @@ +#!/usr/sbin/dtrace -s +/* + * iscsirwlat.d - Report iSCSI Read/Write Latency. Solaris Nevada, DTrace. + * + * This traces iSCSI data I/O events when run on an iSCSI server, and + * produces a report of read/write latency in microseconds. + * + * USAGE: iscsirwlat.d # Hit Ctrl-C to end + * + * FIELDS: + * EVENT Data I/O event (data-send/data-receive) + * REMOTE IP IP address of the client + * COUNT Number of I/O events + * Kbytes Total data Kbytes transferred + * + * NOTE: If you try to sum the read times, they will sometimes add to + * a time longer than the sample time - the reason is that reads can + * occur in parallel, and so suming them together will overcount. + */ + +#pragma ident "@(#)iscsirwlat.d 1.3 07/03/28 SMI" + +#pragma D option quiet + +dtrace:::BEGIN +{ + printf("Tracing... Hit Ctrl-C to end.\n"); +} + +iscsi*:::scsi-command +/args[2]->ic_cdb[0] == 0x08 || args[2]->ic_cdb[0] == 0x28/ +{ + /* + * self-> variables can't be used, as one thread receives the + * scsi command while another receives the reads. + */ + start_read[args[1]->ii_itt, args[1]->ii_initiator] = timestamp; +} + +iscsi*:::scsi-command +/args[2]->ic_cdb[0] == 0x0a || args[2]->ic_cdb[0] == 0x2a/ +{ + start_write[args[1]->ii_itt, args[1]->ii_initiator] = timestamp; +} + +iscsi*:::data-send +/start_read[args[1]->ii_itt, args[1]->ii_initiator] && + (args[1]->ii_flags & ISCSI_FLAG_FINAL)/ +{ + @read[args[0]->ci_remote] = quantize(timestamp - + start_read[args[1]->ii_itt, args[1]->ii_initiator]); + start_read[args[1]->ii_ttt, args[1]->ii_initiator] = 0; +} + +iscsi*:::scsi-response +/start_write[args[1]->ii_itt, args[1]->ii_initiator]/ +{ + @write[args[0]->ci_remote] = quantize(timestamp - + start_write[args[1]->ii_itt, args[1]->ii_initiator]); + start_write[args[1]->ii_itt, args[1]->ii_initiator] = 0; +} + +dtrace:::END +{ + printf("Read Latency (ns),\n"); + printa(@read); + printf("Write Latency (ns),\n"); + printa(@write); +} diff --git a/iscsisnoop.d b/iscsisnoop.d new file mode 100644 index 0000000..7d8412f --- /dev/null +++ b/iscsisnoop.d @@ -0,0 +1,71 @@ +#!/usr/sbin/dtrace -s +/* + * iscsisnoop.d - Snoop iSCSI events. Solaris Nevada, DTrace. + * + * This snoops iSCSI events when run on an iSCSI server. + * + * USAGE: iscsisnoop.d # Hit Ctrl-C to end + * + * FIELDS: + * CPU CPU event occured on + * REMOTE IP IP address of the client + * EVENT Data I/O event (data-send/data-receive) + * BYTES Data bytes + * ITT Initiator task tag + * SCSIOP SCSI opcode as a description, as hex, or '-' + * + * NOTE: On multi-CPU servers output may not be in correct time order + * (shuffled). A change in the CPU column is a hint that this happened. + * If this is a problem, print an extra timestamp field and post sort. + */ + +#pragma ident "@(#)iscsisnoop.d 1.2 07/03/27 SMI" + +#pragma D option quiet +#pragma D option switchrate=10 + +dtrace:::BEGIN +{ + printf("%3s %-26s %-14s %6s %10s %6s\n", "CPU", "REMOTE IP", + "EVENT", "BYTES", "ITT", "SCSIOP"); + + /* + * SCSI opcode to string translation hash. This is from + * /usr/include/sys/scsi/generic/commands.h. If you would + * rather all hex, comment this out. + */ + scsiop[0x08] = "read"; + scsiop[0x0a] = "write"; + scsiop[0x0b] = "seek"; + scsiop[0x28] = "read(10)"; + scsiop[0x2a] = "write(10)"; + scsiop[0x2b] = "seek(10)"; +} + +iscsi*:::data-*, +iscsi*:::login-*, +iscsi*:::logout-*, +iscsi*:::nop-*, +iscsi*:::task-*, +iscsi*:::async-*, +iscsi*:::scsi-response +{ + printf("%3d %-26s %-14s %6d %10d -\n", cpu, args[0]->ci_remote, + probename, args[1]->ii_datalen, args[1]->ii_itt); +} + +iscsi*:::scsi-command +/scsiop[args[2]->ic_cdb[0]] != NULL/ +{ + printf("%3d %-26s %-14s %6d %10d %s\n", cpu, args[0]->ci_remote, + probename, args[1]->ii_datalen, args[1]->ii_itt, + scsiop[args[2]->ic_cdb[0]]); +} + +iscsi*:::scsi-command +/scsiop[args[2]->ic_cdb[0]] == NULL/ +{ + printf("%3d %-26s %-14s %6d %10d 0x%x\n", cpu, args[0]->ci_remote, + probename, args[1]->ii_datalen, args[1]->ii_itt, + args[2]->ic_cdb[0]); +} diff --git a/iscsiterr2.d b/iscsiterr2.d new file mode 100644 index 0000000..d9f892c --- /dev/null +++ b/iscsiterr2.d @@ -0,0 +1,231 @@ +#!/usr/sbin/dtrace -Cs + +#pragma D option quiet +#pragma D option switchrate=10hz + +typedef enum idm_status { + IDM_STATUS_SUCCESS = 0, + IDM_STATUS_FAIL, + IDM_STATUS_NORESOURCES, + IDM_STATUS_REJECT, + IDM_STATUS_IO, + IDM_STATUS_ABORTED, + IDM_STATUS_SUSPENDED, + IDM_STATUS_HEADER_DIGEST, + IDM_STATUS_DATA_DIGEST, + IDM_STATUS_PROTOCOL_ERROR, + IDM_STATUS_LOGIN_FAIL +} idm_status_t; + +dtrace:::BEGIN +{ + /* + * The following was generated from the SCSI_CMDS_KEY_STRINGS + * definitions in /usr/include/sys/scsi/generic/commands.h using sed. + * Additional codes gathered from http://en.wikipedia.org/wiki/SCSI_command + */ + scsi_cmd[0x00] = "test_unit_ready"; + scsi_cmd[0x01] = "rezero/rewind"; + scsi_cmd[0x03] = "request_sense"; + scsi_cmd[0x04] = "format"; + scsi_cmd[0x05] = "read_block_limits"; + scsi_cmd[0x07] = "reassign"; + scsi_cmd[0x08] = "read"; + scsi_cmd[0x0a] = "write"; + scsi_cmd[0x0b] = "seek"; + scsi_cmd[0x0f] = "read_reverse"; + scsi_cmd[0x10] = "write_file_mark"; + scsi_cmd[0x11] = "space"; + scsi_cmd[0x12] = "inquiry"; + scsi_cmd[0x13] = "verify"; + scsi_cmd[0x14] = "recover_buffer_data"; + scsi_cmd[0x15] = "mode_select"; + scsi_cmd[0x16] = "reserve"; + scsi_cmd[0x17] = "release"; + scsi_cmd[0x18] = "copy"; + scsi_cmd[0x19] = "erase_tape"; + scsi_cmd[0x1a] = "mode_sense"; + scsi_cmd[0x1b] = "load/start/stop"; + scsi_cmd[0x1c] = "get_diagnostic_results"; + scsi_cmd[0x1d] = "send_diagnostic_command"; + scsi_cmd[0x1e] = "door_lock"; + scsi_cmd[0x23] = "read_format_capacity"; + scsi_cmd[0x24] = "set_window"; + scsi_cmd[0x25] = "read_capacity"; + scsi_cmd[0x28] = "read(10)"; + scsi_cmd[0x29] = "read_generation"; + scsi_cmd[0x2a] = "write(10)"; + scsi_cmd[0x2b] = "seek(10)"; + scsi_cmd[0x2c] = "erase(10)"; + scsi_cmd[0x2d] = "read_updated_block"; + scsi_cmd[0x2e] = "write_verify"; + scsi_cmd[0x2f] = "verify(10)"; + scsi_cmd[0x30] = "search_data_high"; + scsi_cmd[0x31] = "search_data_equal"; + scsi_cmd[0x32] = "search_data_low"; + scsi_cmd[0x33] = "set_limits"; + scsi_cmd[0x34] = "read_position"; + scsi_cmd[0x35] = "synchronize_cache"; + scsi_cmd[0x36] = "lock_unlock_cache"; + scsi_cmd[0x37] = "read_defect_data"; + scsi_cmd[0x38] = "medium_scan"; + scsi_cmd[0x39] = "compare"; + scsi_cmd[0x3a] = "copy_verify"; + scsi_cmd[0x3b] = "write_buffer"; + scsi_cmd[0x3c] = "read_buffer"; + scsi_cmd[0x3d] = "update_block"; + scsi_cmd[0x3e] = "read_long"; + scsi_cmd[0x3f] = "write_long"; + scsi_cmd[0x40] = "change_definition"; + scsi_cmd[0x41] = "write_same(10)"; + scsi_cmd[0x44] = "report_densities/read_header"; + scsi_cmd[0x45] = "play_audio(10)"; + scsi_cmd[0x46] = "get_configuration"; + scsi_cmd[0x47] = "play_audio_msf"; + scsi_cmd[0x4a] = "get_event_status_notification"; + scsi_cmd[0x4b] = "pause_resume"; + scsi_cmd[0x4c] = "log_select"; + scsi_cmd[0x4d] = "log_sense"; + scsi_cmd[0x50] = "xdwrite(10)"; + scsi_cmd[0x51] = "xpwrite(10)"; + scsi_cmd[0x52] = "xdread(10)"; + scsi_cmd[0x53] = "xdwriteread(10)"; + scsi_cmd[0x54] = "send_opc_information"; + scsi_cmd[0x55] = "mode_select(10)"; + scsi_cmd[0x56] = "reserve(10)"; + scsi_cmd[0x57] = "release(10)"; + scsi_cmd[0x58] = "repair track"; + scsi_cmd[0x5a] = "mode_sense(10)"; + scsi_cmd[0x5b] = "close_track/session"; + scsi_cmd[0x5c] = "read_buffer_capacity"; + scsi_cmd[0x5d] = "send_cue_sheet"; + scsi_cmd[0x5e] = "persistent_reserve_in"; + scsi_cmd[0x5f] = "persistent_reserve_out"; + scsi_cmd[0x7e] = "extended_cdb"; + scsi_cmd[0x7f] = "variable_length_cdb"; + scsi_cmd[0x80] = "write_file_mark(16)"; + scsi_cmd[0x81] = "read_reverse(16)"; + scsi_cmd[0x82] = "regenerate(16)"; + scsi_cmd[0x83] = "extended_copy"; + scsi_cmd[0x84] = "receive_copy_results"; + scsi_cmd[0x85] = "ata_command_passthrough(16)"; + scsi_cmd[0x86] = "access_control_in"; + scsi_cmd[0x87] = "access_control_out"; + scsi_cmd[0x88] = "read(16)"; + scsi_cmd[0x8a] = "write(16)"; + scsi_cmd[0x8b] = "orwrite"; + scsi_cmd[0x8c] = "read_attribute"; + scsi_cmd[0x8d] = "write_attribute"; + scsi_cmd[0x8e] = "write_verify(16)"; + scsi_cmd[0x8f] = "verify(16)"; + scsi_cmd[0x90] = "prefetch(16)"; + scsi_cmd[0x91] = "synchronize_cache(16)"; + scsi_cmd[0x92] = "space(16)/lock_unlock_cache(16)"; + scsi_cmd[0x93] = "write_same(16)"; + scsi_cmd[0x9e] = "service_action_in(16)"; + scsi_cmd[0x9f] = "service_action_out(16)"; + scsi_cmd[0xa0] = "report_luns"; + scsi_cmd[0xa1] = "ata_command_passthrough(12)"; + scsi_cmd[0xa2] = "security_protocol_in"; + scsi_cmd[0xa3] = "report_supported_opcodes"; + scsi_cmd[0xa4] = "maintenance_out"; + scsi_cmd[0xa5] = "move_medium"; + scsi_cmd[0xa6] = "exchange_medium"; + scsi_cmd[0xa7] = "move_medium_attached"; + scsi_cmd[0xa8] = "read(12)"; + scsi_cmd[0xa9] = "service_action_out(12)"; + scsi_cmd[0xaa] = "write(12)"; + scsi_cmd[0xab] = "service_action_in(12)"; + scsi_cmd[0xac] = "get_performance/erase(12)"; + scsi_cmd[0xad] = "read_dvd_structure"; + scsi_cmd[0xae] = "write_verify(12)"; + scsi_cmd[0xaf] = "verify(12)"; + scsi_cmd[0xb0] = "search_data_high(12)"; + scsi_cmd[0xb1] = "search_data_equal(12)"; + scsi_cmd[0xb2] = "search_data_low(12)"; + scsi_cmd[0xb3] = "set_limits(12)"; + scsi_cmd[0xb4] = "read_element_status_attached"; + scsi_cmd[0xb5] = "security_protocol_out"; + scsi_cmd[0xb6] = "send_volume_tag"; + scsi_cmd[0xb7] = "read_defect_data(12)"; + scsi_cmd[0xb8] = "read_element_status"; + scsi_cmd[0xb9] = "read_cd_msf"; + scsi_cmd[0xba] = "redundancy_group_in"; + scsi_cmd[0xbb] = "redundancy_group_out"; + scsi_cmd[0xbc] = "spare_in"; + scsi_cmd[0xbd] = "spare_out"; + scsi_cmd[0xbe] = "volume_set_in"; + scsi_cmd[0xbf] = "volume_set_out"; + + /* Key codes */ + key_code[0x0] = "no_sense"; + key_code[0x1] = "soft_error"; + key_code[0x2] = "not_ready"; + key_code[0x3] = "medium_error"; + key_code[0x4] = "hardware_error"; + key_code[0x5] = "illegal_request"; + key_code[0x6] = "unit_attention"; + key_code[0x7] = "data_protect"; + key_code[0x8] = "blank_check"; + key_code[0x9] = "vendor_specific"; + key_code[0xa] = "copy_aborted"; + key_code[0xb] = "aborted_command"; + /* key_code[0xc] is obsolete */ + key_code[0xd] = "volume_overflow"; + key_code[0xe] = "miscompare"; + /* key_code[0xf] is reserved */ + + /* kcq codes - of the form key/asc/ascq */ + /* http://en.wikipedia.org/wiki/Key_Code_Qualifier */ + kcq_code[0x0,0x0,0x0] = "no error"; + kcq_code[0x5,0x24,0x0] = "illegal field in CDB"; + kcq_code[0x6,0x29,0x0] = "POR or device reset occurred"; + kcq_code[0x6,0x29,0x1] = "POR occurred"; + + /* IDM status codes */ + status[IDM_STATUS_FAIL] = "FAIL"; + status[IDM_STATUS_NORESOURCES] = "NORESOURCES"; + status[IDM_STATUS_REJECT] = "REJECT"; + status[IDM_STATUS_IO] = "IO"; + status[IDM_STATUS_ABORTED] = "ABORTED"; + status[IDM_STATUS_SUSPENDED] = "SUSPENDED"; + status[IDM_STATUS_HEADER_DIGEST] = "HEADER_DIGEST"; + status[IDM_STATUS_DATA_DIGEST] = "DATA_DIGEST"; + status[IDM_STATUS_PROTOCOL_ERROR] = "PROTOCOL_ERROR"; + status[IDM_STATUS_LOGIN_FAIL] = "LOGIN_FAIL"; + + printf("%-20s %-20s %s\n", "TIME", "CLIENT", "ERROR"); +} + +fbt::idm_pdu_complete:entry +/arg1 != IDM_STATUS_SUCCESS/ +{ + this->ic = args[0]->isp_ic; + this->remote = (this->ic->ic_raddr.ss_family == AF_INET) ? + inet_ntoa((ipaddr_t *)&((struct sockaddr_in *)& + this->ic->ic_raddr)->sin_addr) : + inet_ntoa6(&((struct sockaddr_in6 *)& + this->ic->ic_raddr)->sin6_addr); + + this->err = status[arg1] != NULL ? status[arg1] : lltostr(arg1); + printf("%-20Y %-20s %s\n", walltimestamp, this->remote, this->err); +} + +fbt::stmf_send_scsi_status:entry +/ args[0]->task_mgmt_function == 0 && args[0]->task_sense_data != 0 / +{ + /* TODO: get the client address */ + printf("%-20Y %-20s ", walltimestamp, "-"); + this->code = args[0]->task_cdb[0]; + this->cmd = scsi_cmd[this->code] != NULL ? + scsi_cmd[this->code] : lltostr(this->code); + printf("CMD=%s, ", this->cmd); + this->key_id = (unsigned)args[0]->task_sense_data[2]; + this->asc_id = (unsigned)args[0]->task_sense_data[12]; + this->ascq_id = (unsigned)args[0]->task_sense_data[13]; + this->key = key_code[this->key_id] != NULL ? key_code[this->key_id] : "TBD"; + this->kcq = kcq_code[this->key_id,this->asc_id,this->ascq_id] != NULL ? + kcq_code[this->key_id,this->asc_id,this->ascq_id] : "TBD"; + printf("Check condition, Key/ASC/ASCQ = %x/%x/%x (%s/%s)\n", + this->key_id, this->asc_id, this->ascq_id,this->key, this->kcq); +} diff --git a/iscsiwho.d b/iscsiwho.d new file mode 100644 index 0000000..416fd68 --- /dev/null +++ b/iscsiwho.d @@ -0,0 +1,33 @@ +#!/usr/sbin/dtrace -s +/* + * iscsiwho.d - Report iSCSI client events. Solaris Nevada, DTrace. + * + * This traces requested iSCSI events when run on an iSCSI server. + * + * USAGE: iscsiwho.d # Hit Ctrl-C to end + * + * FIELDS: + * REMOTE IP IP address of the client + * iSCSI EVENT iSCSI event type. + * COUNT Number of events traced + */ + +#pragma ident "@(#)iscsiwho.d 1.3 07/03/27 SMI" + +#pragma D option quiet + +dtrace:::BEGIN +{ + printf("Tracing... Hit Ctrl-C to end.\n"); +} + +iscsi*::: +{ + @events[args[0]->ci_remote, probename] = count(); +} + +dtrace:::END +{ + printf(" %-26s %14s %8s\n", "REMOTE IP", "iSCSI EVENT", "COUNT"); + printa(" %-26s %14s %@8d\n", @events); +} diff --git a/learn-d.d b/learn-d.d new file mode 100644 index 0000000..c304653 --- /dev/null +++ b/learn-d.d @@ -0,0 +1,6 @@ +BEGIN +{ + trace("hello world."); + exit(0); + +} \ No newline at end of file diff --git a/loadavg.d b/loadavg.d new file mode 100644 index 0000000..dd22d19 --- /dev/null +++ b/loadavg.d @@ -0,0 +1,17 @@ +#!/usr/sbin/dtrace -s +#pragma D option quiet + +profile:::tick-1sec + { + this->load1a = `hp_avenrun[0] / 65536; + this->load5a = `hp_avenrun[1] / 65536; + this->load15a = `hp_avenrun[2] / 65536; + this->load1b = ((`hp_avenrun[0] % 65536) * 100) / 65536; + this->load5b = ((`hp_avenrun[1] % 65536) * 100) / 65536; + this->load15b = ((`hp_avenrun[2] % 65536) * 100) / 65536; + + printf("%.05d %d.%02d %d.%02d %d.%02d\n", + walltimestamp/1000000000, this->load1a, this->load1b, this->load5a, + this->load5b, this->load15a, this->load15b); + + } \ No newline at end of file diff --git a/metaslab_free.d b/metaslab_free.d new file mode 100644 index 0000000..422b40c --- /dev/null +++ b/metaslab_free.d @@ -0,0 +1,53 @@ +#!/usr/sbin/dtrace -s +/* + * metaslab_free.d Show ZFS metaslab percent free on allocations. + * + * ZFS switches to a slower allocation algorithm when the free size in a + * metaslab (usually 16 Gbytes) is less than a percentage. The slower + * algorithm is best-fit instead of fast-fit. + * + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at http://smartos.org/CDDL + * + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file. + * + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Copyright (c) 2012 Joyent Inc., All rights reserved. + * Copyright (c) 2012 Brendan Gregg, All rights reserved. + */ + +#pragma D option quiet + +dtrace:::BEGIN +{ + printf("Tracing ZFS metaslab alloc. metaslab_df_free_pct = %d %%\n", + `metaslab_df_free_pct); +} + +fbt::metaslab_df_alloc:entry +{ + this->pct = args[0]->sm_space * 100 / args[0]->sm_size; + @[this->pct] = count(); +} + +profile:::tick-1s +{ + printf("\n%Y free %%pct by allocations:", walltimestamp); + printa(@); + trunc(@); +} \ No newline at end of file diff --git a/nfs-l.d b/nfs-l.d new file mode 100644 index 0000000..7013236 --- /dev/null +++ b/nfs-l.d @@ -0,0 +1,252 @@ +#!/usr/sbin/dtrace -s +#pragma D option quiet +#pragma D option defaultargs +#pragma D option switchrate=1000hz +#pragma D option dynvarsize=512m + + +inline int TICKS=$1; +inline string ADDR=$$2; + +dtrace:::BEGIN +{ + TIMER = ( TICKS != NULL ) ? TICKS : 1 ; + ticks = TIMER; + TITLE=10; + title = 0; + walltime= walltimestamp/1000000000; + type="R"; + io_tm[type]=0; + io_ct[type]=0; + io_sz[type]=0; + nfs_tm[type]=0; + nfs_ct[type]=0; + nfs_sz[type]=0; + zfs_tm[type]=0; + zfs_ct[type]=0; + zfs_sz[type]=0; + tcp_ct[type]=0; + tcp_sz[type]=0; + type="W"; + io_tm[type]=0; + io_ct[type]=0; + io_sz[type]=0; + nfs_tm[type]=0; + nfs_ct[type]=0; + nfs_sz[type]=0; + zfs_tm[type]=0; + zfs_ct[type]=0; + zfs_sz[type]=0; + tcp_ct[type]=0; + tcp_sz[type]=0; + @nfs_rmx=max(0); + @nfs_wmx=max(0); + @zfs_rmx=max(0); + @zfs_wmx=max(0); + @io_rmx=max(0); + @io_wmx=max(0); +} + +/* ===================== beg TCP ================================= */ +tcp:::send +/ ( ADDR == NULL || args[3]->tcps_raddr == ADDR ) && args[2]->ip_plength - args[4]->tcp_offset > 0 / +{ + this->type="R"; + tcp_ct[this->type]++; + tcp_sz[this->type]=tcp_sz[this->type]+ args[2]->ip_plength - args[4]->tcp_offset; + @tcprsz=quantize(args[2]->ip_plength - args[4]->tcp_offset); +} +tcp:::receive +/ ( ADDR == NULL || args[3]->tcps_raddr == ADDR ) && args[2]->ip_plength - args[4]->tcp_offset > 0 / +{ + this->type="W"; + tcp_ct[this->type]++; + tcp_sz[this->type]=tcp_sz[this->type]+ args[2]->ip_plength - args[4]->tcp_offset; + @tcpwsz=quantize(args[2]->ip_plength - args[4]->tcp_offset); +} +/* ===================== end TCP ================================= */ + +/* ===================== beg NFS ================================= */ +nfsv3:::op-read-start, nfsv3:::op-write-start ,nfsv4:::op-read-start +{ + tm[args[1]->noi_xid] = timestamp; + sz[args[1]->noi_xid] = args[2]->count ; +} +nfsv4:::op-write-start +{ + tm[args[1]->noi_xid] = timestamp; + sz[args[1]->noi_xid] = args[2]->data_len ; +} +nfsv3:::op-read-done, nfsv3:::op-write-done, nfsv4:::op-read-done, nfsv4:::op-write-done +/tm[args[1]->noi_xid]/ +{ + this->delta= timestamp - tm[args[1]->noi_xid]; + this->type = probename == "op-write-done" ? "W" : "R"; + nfs_tm[this->type]=nfs_tm[this->type]+this->delta; + @nfs_rmx=max( (this->type == "R" ? this->delta : 0)/1000000); + @nfs_wmx=max( (this->type == "W" ? this->delta : 0)/1000000); + nfs_ct[this->type]++; + nfs_sz[this->type]=nfs_sz[this->type]+ sz[args[1]->noi_xid]; + tm[args[1]->noi_xid] = 0; + sz[args[1]->noi_xid] = 0; +} +/* --------------------- end NFS --------------------------------- */ + +/* ===================== beg ZFS ================================= */ + +zfs_read:entry,zfs_write:entry +{ + self->ts = timestamp; + self->filepath = args[0]->v_path; + self->size = ((uio_t *)arg1)->uio_resid; +} + + +zfs_read:return,zfs_write:return +/self->ts / +{ + this->type = probefunc == "zfs_write" ? "W" : "R"; + this->delta=timestamp - self->ts ; + zfs_tm[this->type]= zfs_tm[this->type] + this->delta; + zfs_ct[this->type]++; + zfs_sz[this->type]=zfs_sz[this->type]+self->size; + @zfs_rmx=max( (this->type == "R" ? this->delta : 0)/1000000); + @zfs_wmx=max( (this->type == "W" ? this->delta : 0)/1000000); + self->ts=0; + self->filepath=0; + self->size=0; +} +/* --------------------- end ZFS --------------------------------- */ + + + +/* ===================== beg IO ================================= */ +io:::start +/ arg0 != NULL && args[0]->b_addr != 0 / +{ + tm_io[(struct buf *)arg0] = timestamp; + sz_io[(struct buf *)arg0] = args[0]->b_bcount; + +} +io:::done +/tm_io[(struct buf *)arg0]/ +{ + this->type = args[0]->b_flags & B_READ ? "R" : "W" ; + this->delta = ( timestamp - tm_io[(struct buf *)arg0]); + io_tm[this->type]=io_tm[this->type]+this->delta; + @io_rmx=max( (this->type == "R" ? this->delta : 0)/1000000); + @io_wmx=max( (this->type == "W" ? this->delta : 0)/1000000); + io_ct[this->type]++; + io_sz[this->type]=io_sz[this->type]+ sz_io[(struct buf *)arg0] ; + sz_io[(struct buf *)arg0] = 0; + tm_io[(struct buf *)arg0] = 0; +} +/* --------------------- end IO --------------------------------- */ + + + +profile:::tick-1sec / ticks > 0 / { ticks--; } + +profile:::tick-1sec +/ ticks == 0 / +{ + type="R"; + printf("-------------avg_ms--------MB/s--IO/sz/kb--max_ms--- summary\n"); + printf(" %s | IO : %4d.%03d %6d.%03d %4d ",type, + (io_sz[type]/8196 == 0 ? 0 : io_tm[type]/(io_sz[type]/8196))/1000000 , + ((io_sz[type]/8196 == 0 ? 0 : io_tm[type]/(io_sz[type]/8196))/1000) % 1000, + (io_sz[type]/TIMER)/1000000, + ((io_sz[type]/TIMER)/1000)%1000, + (io_ct[type] == 0 ? 0 : io_sz[type]/io_ct[type])/1000 ); + printa(" %@8u summary\n", @io_rmx); + printf(" %s | ZFS : %4d.%03d %6d.%03d %4d ",type, + (zfs_sz[type]/8196 == 0 ? 0 : zfs_tm[type]/(zfs_sz[type]/8196))/1000000, + ((zfs_sz[type]/8196 == 0 ? 0 : zfs_tm[type]/(zfs_sz[type]/8196))/1000) % 1000, + (zfs_sz[type]/TIMER)/1000000, + ((zfs_sz[type]/TIMER)/1000)%1000, + (zfs_ct[type] == 0 ? 0 : zfs_sz[type]/zfs_ct[type])/1000); + printa(" %@8u summary\n", @zfs_rmx); + printf(" %s | NFS : %4d.%03d %6d.%03d %4d ",type, + (nfs_sz[type]/8196 == 0 ? 0 : nfs_tm[type]/(nfs_sz[type]/8196))/1000000, + ((nfs_sz[type]/8196 == 0 ? 0 : nfs_tm[type]/(nfs_sz[type]/8196))/1000) % 1000, + (nfs_sz[type]/TIMER)/1000000, + ((nfs_sz[type]/TIMER)/1000)%1000, + (nfs_ct[type] == 0 ? 0 : nfs_sz[type]/nfs_ct[type])/1000); + printa(" %@8u summary ", @nfs_rmx ); + printf(" %8d \n", nfs_ct[type]); + printf(" %s | TCP : %6d.%03d ",type, + (tcp_sz[type]/TIMER)/1000000, + ((tcp_sz[type]/TIMER)/1000)%1000 + ); + printf(" summary %d\n",tcp_ct[type]/TIMER); + type="W"; + printf(" ------ summary\n"); + printf(" %s | IO : %4d.%03d %6d.%03d %4d ",type, + (io_sz[type]/8196 == 0 ? 0 : io_tm[type]/(io_sz[type]/8196))/1000000 , + ((io_sz[type]/8196 == 0 ? 0 : io_tm[type]/(io_sz[type]/8196))/1000) % 1000, + (io_sz[type]/TIMER)/1000000, + ((io_sz[type]/TIMER)/1000)%1000, + (io_ct[type] == 0 ? 0 : io_sz[type]/io_ct[type])/1000 ); + printa(" %@8u summary\n", @io_wmx); + printf(" %s | ZFS : %4d.%03d %6d.%03d %4d ",type, + (zfs_sz[type]/8196 == 0 ? 0 : zfs_tm[type]/(zfs_sz[type]/8196))/1000000, + ((zfs_sz[type]/8196 == 0 ? 0 : zfs_tm[type]/(zfs_sz[type]/8196))/1000) % 1000, + (zfs_sz[type]/TIMER)/1000000, + ((zfs_sz[type]/TIMER)/1000)%1000, + (zfs_ct[type] == 0 ? 0 : zfs_sz[type]/zfs_ct[type])/1000); + printa(" %@8u summary\n", @zfs_wmx); + printf(" %s | NFS : %4d.%03d %6d.%03d %4d ",type, + (nfs_sz[type]/8196 == 0 ? 0 : nfs_tm[type]/(nfs_sz[type]/8196))/1000000, + ((nfs_sz[type]/8196 == 0 ? 0 : nfs_tm[type]/(nfs_sz[type]/8196))/1000) % 1000, + (nfs_sz[type]/TIMER)/1000000, + ((nfs_sz[type]/TIMER)/1000)%1000, + (nfs_ct[type] == 0 ? 0 : nfs_sz[type]/nfs_ct[type])/1000); + printa(" %@8u summary\n", @nfs_wmx); + printf(" %s | TCP : %6d.%03d ",type, + (tcp_sz[type]/TIMER)/1000000, + ((tcp_sz[type]/TIMER)/1000)%1000 + ); + printf(" summary %d\n",tcp_ct[type]/TIMER); + printf(" IOPs %u summary\n", (io_ct["W"]+io_ct["R"])/TIMER); +/* + printa("tcp in (sz) %@d\n",@tcpwsz); + printa("tcp out (sz) %@d\n",@tcprsz); +*/ + type="R"; + trunc(@tcpwsz); + trunc(@tcprsz); + io_tm[type]=0; + io_ct[type]=0; + io_sz[type]=0; + nfs_tm[type]=0; + nfs_ct[type]=0; + nfs_sz[type]=0; + zfs_tm[type]=0; + zfs_ct[type]=0; + zfs_sz[type]=0; + tcp_ct[type]=0; + tcp_sz[type]=0; + type="W"; + io_tm[type]=0; + io_ct[type]=0; + io_sz[type]=0; + nfs_tm[type]=0; + nfs_ct[type]=0; + nfs_sz[type]=0; + zfs_tm[type]=0; + zfs_ct[type]=0; + zfs_sz[type]=0; + tcp_ct[type]=0; + tcp_sz[type]=0; + ticks= TIMER; + title--; + clear(@nfs_rmx); + clear(@nfs_wmx); + clear(@zfs_rmx); + clear(@zfs_wmx); + clear(@io_rmx); + clear(@io_wmx); +} + +/* use if you want to print something every TITLE lines */ +profile:::tick-1sec / title <= 0 / { title=TITLE; } \ No newline at end of file diff --git a/nfsrpcattr.d b/nfsrpcattr.d new file mode 100644 index 0000000..b32531f --- /dev/null +++ b/nfsrpcattr.d @@ -0,0 +1,31 @@ +syscall:::entry +{ + + self->syscallname = probefunc; +} + +syscall:::return +{ + + self->syscallname = ""; +} + +nfsclient::: +/self->syscallname != 0 && self->syscallname != ""/ +{ + + trace(probemod); + trace(arg0); + trace(execname); + trace(self->syscallname); +} + +nfsclient::: +/self->syscallname == 0 || self->syscallname == ""/ +{ + + trace(probemod); + trace(arg0); + trace(execname); + trace("-"); +} diff --git a/nfsv3errors.d b/nfsv3errors.d new file mode 100644 index 0000000..e625cf9 --- /dev/null +++ b/nfsv3errors.d @@ -0,0 +1,70 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet +#pragma D option switchrate=10hz + +dtrace:::BEGIN +{ + /* See NFS3ERR_* in /usr/include/nfs/nfs.h */ + nfs3err[0] = "NFS3_OK"; + nfs3err[1] = "PERM"; + nfs3err[2] = "NOENT"; + nfs3err[5] = "IO"; + nfs3err[6] = "NXIO"; + nfs3err[13] = "ACCES"; + nfs3err[17] = "EXIST"; + nfs3err[18] = "XDEV"; + nfs3err[19] = "NODEV"; + nfs3err[20] = "NOTDIR"; + nfs3err[21] = "ISDIR"; + nfs3err[22] = "INVAL"; + nfs3err[27] = "FBIG"; + nfs3err[28] = "NOSPC"; + nfs3err[30] = "ROFS"; + nfs3err[31] = "MLINK"; + nfs3err[63] = "NAMETOOLONG"; + nfs3err[66] = "NOTEMPTY"; + nfs3err[69] = "DQUOT"; + nfs3err[70] = "STALE"; + nfs3err[71] = "REMOTE"; + nfs3err[10001] = "BADHANDLE"; + nfs3err[10002] = "NOT_SYNC"; + nfs3err[10003] = "BAD_COOKIE"; + nfs3err[10004] = "NOTSUPP"; + nfs3err[10005] = "TOOSMALL"; + nfs3err[10006] = "SERVERFAULT"; + nfs3err[10007] = "BADTYPE"; + nfs3err[10008] = "JUKEBOX"; + + printf(" %-18s %5s %-12s %-16s %s\n", "NFSv3 EVENT", "ERR", "CODE", + "CLIENT", "PATHNAME"); +} + +nfsv3:nfssrv::op-commit-done, +nfsv3:nfssrv::op-pathconf-done, +nfsv3:nfssrv::op-fsinfo-done, +nfsv3:nfssrv::op-fsstat-done, +nfsv3:nfssrv::op-readdirplus-done, +nfsv3:nfssrv::op-readdir-done, +nfsv3:nfssrv::op-link-done, +nfsv3:nfssrv::op-rename-done, +nfsv3:nfssrv::op-rmdir-done, +nfsv3:nfssrv::op-remove-done, +nfsv3:nfssrv::op-mknod-done, +nfsv3:nfssrv::op-symlink-done, +nfsv3:nfssrv::op-mkdir-done, +nfsv3:nfssrv::op-create-done, +nfsv3:nfssrv::op-write-done, +nfsv3:nfssrv::op-read-done, +nfsv3:nfssrv::op-readlink-done, +nfsv3:nfssrv::op-access-done, +nfsv3:nfssrv::op-lookup-done, +nfsv3:nfssrv::op-setattr-done, +nfsv3:nfssrv::op-getattr-done +/args[2]->status != 0/ +{ + this->err = args[2]->status; + this->str = nfs3err[this->err] != NULL ? nfs3err[this->err] : "?"; + printf(" %-18s %5d %-12s %-16s %s\n", probename, this->err, + this->str, args[0]->ci_remote, args[1]->noi_curpath); +} diff --git a/nfsv3io.d b/nfsv3io.d new file mode 100644 index 0000000..3406d04 --- /dev/null +++ b/nfsv3io.d @@ -0,0 +1,48 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +dtrace:::BEGIN +{ + interval = 5; + printf("Tracing... Interval %d secs.\n", interval); + tick = interval; +} + +nfsv3:::op-* +{ + @ops[args[0]->ci_remote] = count(); +} + +nfsv3:::op-read-done +{ + @reads[args[0]->ci_remote] = count(); + @readbytes[args[0]->ci_remote] = sum(args[2]->res_u.ok.data.data_len); +} + + +nfsv3:::op-write-done +{ + @writes[args[0]->ci_remote] = count(); + @writebytes[args[0]->ci_remote] = sum(args[2]->res_u.ok.count); +} + +profile:::tick-1sec +/tick-- == 0/ +{ + normalize(@ops, interval); + normalize(@reads, interval); + normalize(@writes, interval); + normalize(@writebytes, 1024 * interval); + normalize(@readbytes, 1024 * interval); + printf("\n %-32s %6s %6s %6s %6s %8s\n", "Client", "r/s", "w/s", + "kr/s", "kw/s", "ops/s"); + printa(" %-32s %@6d %@6d %@6d %@6d %@8d\n", @reads, @writes, + @readbytes, @writebytes, @ops); + trunc(@ops); + trunc(@reads); + trunc(@writes); + trunc(@readbytes); + trunc(@writebytes); + tick = interval; +} \ No newline at end of file diff --git a/nfsv3ops.d b/nfsv3ops.d new file mode 100644 index 0000000..93f7467 --- /dev/null +++ b/nfsv3ops.d @@ -0,0 +1,21 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +dtrace:::BEGIN +{ + trace("Tracing... Interval 5 secs.\n"); +} + +nfsv3:::op-* +{ + @ops[args[0]->ci_remote, probename] = count(); +} + +profile:::tick-5sec, +dtrace:::END +{ + printf("\n %-32s %-28s %8s\n", "Client", "Operation", "Count"); + printa(" %-32s %-28s %@8d\n", @ops); + trunc(@ops); +} \ No newline at end of file diff --git a/nfsv3rwsnoop-2.d b/nfsv3rwsnoop-2.d new file mode 100644 index 0000000..6a1b172 --- /dev/null +++ b/nfsv3rwsnoop-2.d @@ -0,0 +1,27 @@ +#!/usr/sbin/dtrace -s +#pragma D option quiet +#pragma D option switchrate=10hz +dtrace:::BEGIN +{ + printf("%16s\t%20s\t%-18s\t%2s\t%-10s\t%6s\t%s\t%s\n", "TIME(us)", "TIME", + "CLIENT", "OP", "OFFSET(KB)", "BYTES", "4K-ALIGNED", "4K-SIZE"); +} +nfsv3:::op-read-start +{ + printf("%-16d\t%Y\t%-18s\t%2s\t%-10d\t%6d\t%s\t%s\n", + timestamp / 1000, walltimestamp, + args[0]->ci_remote, "R", args[2]->offset / 1024, + args[2]->count, + (args[2]->offset % 4096) == 0 ? "1" : "0", + (args[2]->count % 4096) == 0 ? "1" : "0"); +} +nfsv3:::op-write-start +{ + printf("%-16d\t%Y\t%-18s\t%2s\t%-10d\t%6d\t%s\t%s\n", + timestamp / 1000, walltimestamp, + args[0]->ci_remote, "W", args[2]->offset / 1024, + args[2]->data.data_len, + (args[2]->offset % 4096) == 0 ? "1" : "0", + (args[2]->data.data_len % 4096) == 0 ? "1" : "0"); +} + diff --git a/processtrace.d b/processtrace.d new file mode 100644 index 0000000..b3db9a6 --- /dev/null +++ b/processtrace.d @@ -0,0 +1,30 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet +#pragma D option switchrate=10 + +proc:::create +{ + printf("%s %d %d 0 0 %s fork - - %d\n", zonename, pid, args[0]->pr_pid, + execname, timestamp); + start[args[0]->pr_pid] = timestamp; +} + +proc:::exec-success +{ + printf("%s %d %d 0 0 %s exec - - %d\n", zonename, ppid, pid, + execname, timestamp); +} + +proc:::exit +/(this->s = start[pid])/ +{ + printf("%s %d %d 0 0 %s exit %d ms %d\n", zonename, ppid, pid, + execname, (timestamp - this->s) / 1000000, timestamp); + start[pid] = 0; +} + +profile:::tick-1s +{ + printf("- - - - - - tick 1s - %d\n", timestamp); +} diff --git a/rwtime.d b/rwtime.d new file mode 100644 index 0000000..af22510 --- /dev/null +++ b/rwtime.d @@ -0,0 +1,12 @@ +syscall::read:entry, +syscall::write:entry +/pid == $1/ +{ + ts[probefunc] = timestamp; +} +syscall::read:return, +syscall::write:return +/pid == $1 && ts[probefunc] != 0/ +{ + printf("%d nsecs", timestamp - ts[probefunc]); +} \ No newline at end of file diff --git a/rwtimesize.d b/rwtimesize.d new file mode 100644 index 0000000..8a851e3 --- /dev/null +++ b/rwtimesize.d @@ -0,0 +1,56 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +dtrace:::BEGIN +{ + printf("Tracing... Hit Ctrl-C to end.\n"); +} + +io:::start +{ + start_time[arg0] = timestamp; + /* Storing buffer size to make sure that io:::done probes are triggered + only when this value is greater than 0 */ + trig = args[0]->b_bufsize; +} + +io:::done +/(args[0]->b_flags & B_READ) && (this->start = start_time[arg0]) && trig > 0/ +{ + this->delta = (timestamp - this->start) / 1000; + printf("[READ] Device: %5s Size (bytes): %5d Time (us): %5u \n", + args[1]->dev_statname, args[0]->b_bcount, this->delta); +/* @plots[args[1]->dev_statname, args[0]->b_bcount] = quantize(this->delta); */ + /* Figure out average byte size*/ + this->size = args[0]->b_bcount; + @bsize["average read, bytes"] = avg(this->size); + @plots["read I/O, us"] = quantize(this->delta); + @avgs["average read I/O, us"] = avg(this->delta); + start_time[arg0] = 0; +} + +io:::done +/!(args[0]->b_flags & B_READ) && (this->start = start_time[arg0]) && trig > 0/ +{ + this->delta = (timestamp - this->start) / 1000; + printf("[WRITE] Device: %5s Size (bytes): %5d Time (us): %5u \n", + args[1]->dev_statname, args[0]->b_bcount, this->delta); +/* @plots[args[1]->dev_statname, args[0]->b_bcount] = quantize(this->delta); */ + /* Figure out average byte size*/ + this->size = args[0]->b_bcount; + @bsize["average write, bytes"] = avg(this->size); + @plots["write I/O, us"] = quantize(this->delta); + @avgs["average write I/O, us"] = avg(this->delta); + start_time[arg0] = 0; +} + +dtrace:::END +/trig > 0/ +{ + printf("\nI/O completed time and size summary:\n\n"); + printa("\t%s %@d\n", @avgs); + printa("\t%s %@d\n", @bsize); + printa("\n %s\n%@d\n", @plots); + trig = 0; +} diff --git a/scsi.d b/scsi.d new file mode 100644 index 0000000..b31a210 --- /dev/null +++ b/scsi.d @@ -0,0 +1,942 @@ +#!/usr/sbin/dtrace -qCs + +/* + * Chris.Gerhard@sun.com + * Joel.Buckley@sun.com + */ + +#pragma ident "@(#)scsi.d 1.18 09/03/20 SMI" + +/* + * SCSI logging via dtrace. + * + * See http://blogs.sun.com/chrisg/tags/scsi.d + * + * Usage: + * + * scsi.d [ @T time ][ hba [ X ]] + * + * With no arguments it logs every scsi packet out and as many returns + * as it can find. + * + * So to trace all the io going via instance 2 qus driver do: + * + * scsi.d qus 2 + * + * To do the same trace for 20 seconds do: + * + * scsi.d @T 20 qus 2 + * + * NOTE: the "@" is used instead of the more traditional "-" as dtrace will + * continue to parse arguments so would see "-T" as an argument to dtrace and + * produce a usage error. + * + * Temporarily, ie I intend to change this in the future, scsi.d is also + * taking some more arguments using the -D options as well. I suspect I will + * get rid of the bizarre @T options above and change to using -D completely + * but that will be some point in the future, if at all. + * + * The new options are: + * + * -D EXECNAME='"foo"' + * Which results scsi.d only reporting IO associated with the application "foo". + * -D PRINT_STACK + * Which results in scsi.d printing a kernel stack trace after every outgoing + * packet. + * -D QUIET + * Which results in none of the packets being printed. Kind of pointless + * without another option. + * -D PERF_REPORT + * Which results in a report of how long IOs took aggregated per HBA useful + * with -D QUIET to get performance statistics. + * -D TARGET_STATS + * aggregate the stats based on the target. + * -D LUN_STATS + * aggregate the stats based on the LUN. Requires TARGET_STATS + * -D DYNVARSIZE + * pass this value to the #pragma D option dynvarsize= option. + * -D HBA + * the name of the HBA we are interested in. + * -D MIN_LBA + * Only report logical blocks over this value + * -D MAX_LBA + * Only IOs to report logical blocks that are less than this value. + * -D REPORT_OVERTIME=N + * Only report IOs that have taken longer than this number of nanoseconds. + * This only stops the printing of the packets not the collection of + * statistics. + * There are some tuning options that take effect only when + * REPORT_OVERTIME is set. These are: + * -D NSPEC=N + * Set the number of speculations to this value. + * -D SPECSIZE=N + * Set the size of the speculaton buffer. This should be 200 * + * the size of NSPEC. + * -D CLEANRATE=N + * Specify the clean rate. + * + * Finally scsi.d will also now accept the dtrace -c and -p options to trace + * just the commands or process given. + * + * Since dtrace does not output in real time it is useful to sort the output + * of the script using sort -n to get the entries in chronological order. + * + * NOTE: This does not directly trace what goes onto the scsi bus or fibre, + * to do so would require this script have knowledge of every HBA that could + * ever be connected to a system. It traces the SCSI packets as they are + * passed from the target driver to the HBA in the SCSA layer and then back + * again. Although to get the packet when it is returned it guesses that the + * packet will be destroyed using scsi_destroy_pkt and not modified before it + * is. So far this has worked but there is no garauntee that it will work for + * all HBAs and target drivers in the future. + * + */ + +/* + * This can be used as a framework to report on any rountine that has + * a scsi_pkt passed into it. Search for "FRAMEWORK" to see where you + * have to add bits. + */ +#pragma D option quiet +#pragma D option defaultargs + +#ifdef DYNVARSIZE +#pragma D option dynvarsize=DYNVARSIZE +#endif + +#ifdef REPORT_OVERTIME + +#ifdef NSPEC +#pragma D option nspec=NSPEC +#else +#pragma D option nspec=1000 +#endif /* NSPEC */ + + +#ifdef SPECSIZE +#pragma D option specsize=SPECSIZE +#else +#pragma D option specsize=200000 +#endif /* SPECSIZE */ + +#ifdef CLEANRATE +#pragma D option cleanrate=CLEANRATE +#endif /* CLEANRATE */ + +#endif /* REPORT_OVERTIME */ + +/* + * Have to include scsi_types.h so that in releases where scsi_address + * contains a union and #defines for a_target, a_lun & a_sublun they get + * picked up. + */ +#include + +/* + * Now work around CR 6798038. Depending on how that CR is fixed this + * workaround may have to be revisited so that the script will work in + * all cases. + */ +#ifdef a_lun +#undef a_lun +#define my_alun a.spi.a_lun +#else +#define my_alun a_lun +#endif + +#ifdef a_target +#undef a_target +#define my_atarget a.spi.a_target +#else +#define my_atarget a_target +#endif + +/* + * Just for completeness. The script does not use sublun so these are not + * needed but do no harm. + */ +#ifdef a_sublun +#undef a_sublun +#define my_asublun a.spi.a_sublun +#else +#define my_asublun a_sublun +#endif + +/* + * Set both lba & len to 64Bit values, to avoid type mismatch later... + * + * The following does not function due to DTrace insufficient registers... + * + * #define INT64(A, B, C, D, E, F, G, H) ( \ + * (((A) & 0x0ffL) << 56) | \ + * (((B) & 0x0ffL) << 48) | \ + * (((C) & 0x0ffL) << 40) | \ + * (((D) & 0x0ffL) << 32) | \ + * (((E) & 0x0ffL) << 24) | \ + * (((F) & 0x0ffL) << 16) | \ + * (((G) & 0x0ffL) << 8) | \ + * ((H) & 0x0ffL)) + * + * Instead, calculate lower & upper values, then pass them into this macro: + */ +#define INT64(A, B) ( \ + (((A) & 0x0ffffffffLL) << 32) | \ + ((B) & 0x0ffffffffL)) + +#define INT32(A, B, C, D) ( \ + (((A) & 0x0ffL) << 24) | \ + (((B) & 0x0ffL) << 16) | \ + (((C) & 0x0ffL) << 8) | \ + ((D) & 0x0ffL)) +#define INT16(A, B) ( \ + (((A) & 0x0ffL) << 8) | \ + ((B) & 0x0ffL)) +#define INT8(A) ( \ + ((A) & 0x0ffL)) + +/* #define A_TO_TRAN(ap) ((ap)->a_hba_tran) */ +#define P_TO_TRAN(pkt) ((pkt)->pkt_address.a_hba_tran) +/* #define P_TO_ADDR(pkt) (&((pkt)->pkt_address)) */ +#define P_TO_DEVINFO(pkt) ((struct dev_info *)(P_TO_TRAN(pkt)->tran_hba_dip)) + +#define DEV_NAME(pkt) \ + stringof(`devnamesp[P_TO_DEVINFO(pkt)->devi_major].dn_name) /* ` */ + +#define DEV_INST(pkt) (P_TO_DEVINFO(pkt)->devi_instance) + +#ifdef MIN_BLOCK +#define MIN_TEST && this->lba >= (MIN_BLOCK) +#else +#define MIN_TEST +#endif +#ifdef MAX_BLOCK +#define MAX_TEST && (this->lba <= ((MAX_BLOCK) + this->len)) +#else +#define MAX_TEST +#endif + +#define ARGUMENT_TEST (this->pkt && (hba_set == 0 || \ + hba == DEV_NAME(this->pkt) && \ + (inst_set == 0 || inst == DEV_INST(this->pkt))) MIN_TEST MAX_TEST) + +/* + * PRINT_DEV_FROM_PKT + * + * From a scsi_pkt get the name of the driver and the instance of the + * hba. + */ +#define PRINT_DEV_FROM_PKT(pkt) \ + printf("%s%d:", DEV_NAME(pkt), DEV_INST(pkt)) + +#define PRINT_TIMESTAMP() printf("%5.5d.%9.9d ", \ + (timestamp - script_start_time)/1000000000, \ + (timestamp - script_start_time)%1000000000); + +/* + * Look away now. + * + * This is just hackery to get around me being so stubborn I + * don't want to wrap this in a shell script. + */ +#ifdef HBA +BEGIN { + hba = HBA; + hba_set = 1; + timeout = 0; + inst_set = 0; + inst = 0; +} +#else + +#define TIMEOUT_SELECT "@T" + +#define DO_ARGS(N, NS, M, MS) \ +BEGIN \ +/ hba_set == 0 && NS != "" && NS != TIMEOUT_SELECT / \ +{ \ + hba = NS; \ + hba_set = 1; \ +} \ +BEGIN \ +/ hba_set == 1 && inst_set == 0 && MS != "" / \ +{ \ + inst = M; \ + inst_set = 1; \ +} \ + +BEGIN +/ $$1 == TIMEOUT_SELECT / +{ + timeout = $2; +} + +#endif + +BEGIN +/ $$target != "" / +{ + target = $target; +} + +BEGIN +{ +#ifdef EXECNAME + scsi_d_target_execname = EXECNAME; +#else + scsi_d_target_execname = ""; +#endif +} +#ifndef HBA +DO_ARGS($1, $$1, $2, $$2) +DO_ARGS($3, $$3, $4, $$4) +#endif + +/* + * You can open your eyes again now. + */ + +BEGIN { +#ifdef REPORT_OVERTIME + printf("Only reporting IOs longer than %dns\n", REPORT_OVERTIME); +#endif + printf("Hit Control C to interrupt\n"); + script_start_time = timestamp; + end_time = timestamp + (timeout * 1000000000); + + scsi_ops[0x000, 0x0] = "TEST_UNIT_READY"; + scsi_ops[0x001, 0x0] = "REZERO_UNIT_or_REWIND"; + scsi_ops[0x003, 0x0] = "REQUEST_SENSE"; + scsi_ops[0x004, 0x0] = "FORMAT_UNIT"; + scsi_ops[0x005, 0x0] = "READ_BLOCK_LIMITS"; + scsi_ops[0x006, 0x0] = "Unknown(06)"; + scsi_ops[0x007, 0x0] = "REASSIGN_BLOCKS"; + scsi_ops[0x008, 0x0] = "READ(6)"; + scsi_ops[0x009, 0x0] = "Unknown(09)"; + scsi_ops[0x00a, 0x0] = "WRITE(6)"; + scsi_ops[0x00b, 0x0] = "SEEK(6)"; + scsi_ops[0x00c, 0x0] = "Unknown(0c)"; + scsi_ops[0x00d, 0x0] = "Unknown(0d)"; + scsi_ops[0x00e, 0x0] = "Unknown(0e)"; + scsi_ops[0x00f, 0x0] = "READ_REVERSE"; + scsi_ops[0x010, 0x0] = "WRITE_FILEMARK"; + scsi_ops[0x011, 0x0] = "SPACE"; + scsi_ops[0x012, 0x0] = "INQUIRY"; + scsi_ops[0x013, 0x0] = "VERIFY"; + scsi_ops[0x014, 0x0] = "Unknown(14)"; + scsi_ops[0x015, 0x0] = "MODE_SELECT(6)"; + scsi_ops[0x016, 0x0] = "RESERVE(6)"; + scsi_ops[0x017, 0x0] = "RELEASE(6)"; + scsi_ops[0x018, 0x0] = "COPY"; + scsi_ops[0x019, 0x0] = "ERASE(6)"; + scsi_ops[0x01a, 0x0] = "MODE_SENSE(6)"; + scsi_ops[0x01b, 0x0] = "START_STOP_UNIT"; + scsi_ops[0x01c, 0x0] = "RECIEVE_DIAGNOSTIC_RESULTS"; + scsi_ops[0x01d, 0x0] = "SEND_DIAGNOSTIC"; + scsi_ops[0x01e, 0x0] = "PREVENT_ALLOW_MEDIUM_REMOVAL"; + scsi_ops[0x01f, 0x0] = "Unknown(1f)"; + scsi_ops[0x020, 0x0] = "Unknown(20)"; + scsi_ops[0x021, 0x0] = "Unknown(21)"; + scsi_ops[0x022, 0x0] = "Unknown(22)"; + scsi_ops[0x023, 0x0] = "READ_FORMAT_CAPACITY"; + scsi_ops[0x024, 0x0] = "Unknown(24)"; + scsi_ops[0x025, 0x0] = "READ_CAPACITY(10)"; + scsi_ops[0x026, 0x0] = "Unknown(26)"; + scsi_ops[0x027, 0x0] = "Unknown(27)"; + scsi_ops[0x028, 0x0] = "READ(10)"; + scsi_ops[0x02a, 0x0] = "WRITE(10)"; + scsi_ops[0x02b, 0x0] = "SEEK(10)_or_LOCATE(10)"; + scsi_ops[0x02e, 0x0] = "WRITE_AND_VERIFY(10)"; + scsi_ops[0x02f, 0x0] = "VERIFY(10)"; + scsi_ops[0x030, 0x0] = "SEARCH_DATA_HIGH"; + scsi_ops[0x031, 0x0] = "SEARCH_DATA_EQUAL"; + scsi_ops[0x032, 0x0] = "SEARCH_DATA_LOW"; + scsi_ops[0x033, 0x0] = "SET_LIMITS(10)"; + scsi_ops[0x034, 0x0] = "PRE-FETCH(10)"; + scsi_ops[0x035, 0x0] = "SYNCHRONIZE_CACHE(10)"; + scsi_ops[0x036, 0x0] = "LOCK_UNLOCK_CACHE(10)"; + scsi_ops[0x037, 0x0] = "READ_DEFECT_DATA(10)"; + scsi_ops[0x039, 0x0] = "COMPARE"; + scsi_ops[0x03a, 0x0] = "COPY_AND_WRITE"; + scsi_ops[0x03b, 0x0] = "WRITE_BUFFER"; + scsi_ops[0x03c, 0x0] = "READ_BUFFER"; + scsi_ops[0x03e, 0x0] = "READ_LONG"; + scsi_ops[0x03f, 0x0] = "WRITE_LONG"; + scsi_ops[0x040, 0x0] = "CHANGE_DEFINITION"; + scsi_ops[0x041, 0x0] = "WRITE_SAME(10)"; + scsi_ops[0x04c, 0x0] = "LOG_SELECT"; + scsi_ops[0x04d, 0x0] = "LOG_SENSE"; + scsi_ops[0x050, 0x0] = "XDWRITE(10)"; + scsi_ops[0x051, 0x0] = "XPWRITE(10)"; + scsi_ops[0x052, 0x0] = "XDREAD(10)"; + scsi_ops[0x053, 0x0] = "XDWRITEREAD(10)"; + scsi_ops[0x055, 0x0] = "MODE_SELECT(10)"; + scsi_ops[0x056, 0x0] = "RESERVE(10)"; + scsi_ops[0x057, 0x0] = "RELEASE(10)"; + scsi_ops[0x05a, 0x0] = "MODE_SENSE(10)"; + scsi_ops[0x05e, 0x0] = "PERSISTENT_RESERVE_IN"; + scsi_ops[0x05f, 0x0] = "PERSISTENT_RESERVE_OUT"; + scsi_ops[0x07f, 0x0] = "Variable_Length_CDB"; + scsi_ops[0x07f, 0x3] = "XDREAD(32)"; + scsi_ops[0x07f, 0x4] = "XDWRITE(32)"; + scsi_ops[0x07f, 0x6] = "XPWRITE(32)"; + scsi_ops[0x07f, 0x7] = "XDWRITEREAD(32)"; + scsi_ops[0x07f, 0x9] = "READ(32)"; + scsi_ops[0x07f, 0xb] = "WRITE(32)"; + scsi_ops[0x07f, 0xa] = "VERIFY(32)"; + scsi_ops[0x07f, 0xc] = "WRITE_AND_VERIFY(32)"; + scsi_ops[0x080, 0x0] = "XDWRITE_EXTENDED(16)"; + scsi_ops[0x081, 0x0] = "REBUILD(16)"; + scsi_ops[0x082, 0x0] = "REGENERATE(16)"; + scsi_ops[0x083, 0x0] = "EXTENDED_COPY"; + scsi_ops[0x086, 0x0] = "ACCESS_CONTROL_IN"; + scsi_ops[0x087, 0x0] = "ACCESS_CONTROL_OUT"; + scsi_ops[0x088, 0x0] = "READ(16)"; + scsi_ops[0x08a, 0x0] = "WRITE(16)"; + scsi_ops[0x08c, 0x0] = "READ_ATTRIBUTES"; + scsi_ops[0x08d, 0x0] = "WRITE_ATTRIBUTES"; + scsi_ops[0x08e, 0x0] = "WRITE_AND_VERIFY(16)"; + scsi_ops[0x08f, 0x0] = "VERIFY(16)"; + scsi_ops[0x090, 0x0] = "PRE-FETCH(16)"; + scsi_ops[0x091, 0x0] = "SYNCHRONIZE_CACHE(16)"; + scsi_ops[0x092, 0x0] = "LOCK_UNLOCK_CACHE(16)_or_LOCATE(16)"; + scsi_ops[0x093, 0x0] = "WRITE_SAME(16)_or_ERASE(16)"; + scsi_ops[0x09e, 0x0] = "SERVICE_IN_or_READ_CAPACITY(16)"; + scsi_ops[0x0a0, 0x0] = "REPORT_LUNS"; + scsi_ops[0x0a3, 0x0] = "MAINTENANCE_IN_or_REPORT_TARGET_PORT_GROUPS"; + scsi_ops[0x0a4, 0x0] = "MAINTENANCE_OUT_or_SET_TARGET_PORT_GROUPS"; + scsi_ops[0x0a7, 0x0] = "MOVE_MEDIUM"; + scsi_ops[0x0a8, 0x0] = "READ(12)"; + scsi_ops[0x0aa, 0x0] = "WRITE(12)"; + scsi_ops[0x0ae, 0x0] = "WRITE_AND_VERIFY(12)"; + scsi_ops[0x0af, 0x0] = "VERIFY(12)"; + scsi_ops[0x0b3, 0x0] = "SET_LIMITS(12)"; + scsi_ops[0x0b4, 0x0] = "READ_ELEMENT_STATUS"; + scsi_ops[0x0b7, 0x0] = "READ_DEFECT_DATA(12)"; + scsi_ops[0x0ba, 0x0] = "REDUNDANCY_GROUP_IN"; + scsi_ops[0x0bb, 0x0] = "REDUNDANCY_GROUP_OUT"; + scsi_ops[0x0bc, 0x0] = "SPARE_IN"; + scsi_ops[0x0bd, 0x0] = "SPARE_OUT"; + scsi_ops[0x0be, 0x0] = "VOLUME_SET_IN"; + scsi_ops[0x0bf, 0x0] = "VOLUME_SET_OUT"; + scsi_ops[0x0d0, 0x0] = "EXPLICIT_LUN_FAILOVER"; + scsi_ops[0x0f1, 0x0] = "STOREDGE_CONTROLLER"; + + scsi_reasons[0] = "COMPLETED"; + scsi_reasons[1] = "INCOMPLETE"; + scsi_reasons[2] = "DMA_ERR"; + scsi_reasons[3] = "TRAN_ERR"; + scsi_reasons[4] = "RESET"; + scsi_reasons[5] = "ABORTED"; + scsi_reasons[6] = "TIMEOUT"; + scsi_reasons[7] = "DATA_OVERRUN"; + scsi_reasons[8] = "COMMAND_OVERRUN"; + scsi_reasons[9] = "STATUS_OVERRUN"; + scsi_reasons[10] = "Bad_Message"; + scsi_reasons[11] = "No_Message_Out"; + scsi_reasons[12] = "XID_Failed"; + scsi_reasons[13] = "IDE_Failed"; + scsi_reasons[14] = "Abort_Failed"; + scsi_reasons[15] = "Reject_Failed"; + scsi_reasons[16] = "Nop_Failed"; + scsi_reasons[17] = "Message_Parity_Error_Failed"; + scsi_reasons[18] = "Bus_Device_Reset_Failed"; + scsi_reasons[19] = "Identify_Message_Rejected"; + scsi_reasons[20] = "Unexpected_Bus_free"; + scsi_reasons[21] = "Tag_Rejected"; + scsi_reasons[22] = "TERMINATED"; + scsi_reasons[24] = "Device_Gone"; + + scsi_state[0] = "Success"; + scsi_state[0x2] = "Check_Condition"; + scsi_state[0x4] = "Condition_Met"; + scsi_state[0x8] = "Busy"; + scsi_state[0x10] = "Intermidiate"; + scsi_state[0x14] = "Intermidiate-Condition_Met"; + scsi_state[0x18] = "Reservation_Conflict"; + scsi_state[0x22] = "Command_Terminated"; + scsi_state[0x28] = "Queue_Full"; + scsi_state[0x30] = "ACA_Active"; + + +/* FRAMEWORK:- add your special string to this array */ + names[0] = "->"; /* Command sent */ + names[1] = "<-"; /* Command returning */ + names[2] = "ILL"; /* Illegal Request spotted */ +} + +fbt:scsi:scsi_transport:entry, +fbt:scsi:scsi_destroy_pkt:entry +/ timeout != 0 && end_time < timestamp / +{ + exit(0); +} + +/* + * relying on scsi_destroy_pkt to get the response is a + * hack that may or may not always work depending on HBA + * and target drivers. + * With arc case: PSARC/2009/033 I can use scsi_hba_pkt_comp(). When that + * is out in the wild it should be used. + */ +/* + * If in any of these probes this->pkt is initialized to zero + * the knock on effect is none of the other probes for this clause further + * down get run. + * + * Do not just comment the probes out. Clause local variables are not + * initialized by default so you will see run time dtrace errors if you do + * where other variables do not get set. + */ + +#define PROBE_SWITCH \ + ((target == 0 || pid == target ) && \ + (scsi_d_target_execname == "" || scsi_d_target_execname == execname)) + +fbt:scsi:scsi_transport:entry +/ !PROBE_SWITCH / +{ + this->pkt = (struct scsi_pkt *)0; +} +fbt:scsi:scsi_transport:entry +/ PROBE_SWITCH / +{ + this->pkt = (struct scsi_pkt *)arg0; +} + +fbt:scsi:scsi_destroy_pkt:entry +{ + this->pkt = (struct scsi_pkt *)arg0; +} + +/* + * FRAMEWORK:- create your entry probe and make this->pkt point to the + * argument that has the scsi_pkt. eg add one like this: + * + */ +fbt:*sd:*sd_sense_key_illegal_request:entry +/ !PROBE_SWITCH / +{ + this->pkt = (struct scsi_pkt *)0; +} +fbt:*sd:*sd_sense_key_illegal_request:entry +/ PROBE_SWITCH / +{ + this->pkt = (struct scsi_pkt *)arg3; +} + +/* FRAMEWORK:- Add your probe name to the list for CDB_PROBES */ +#define CDB_PROBES \ +fbt:scsi:scsi_transport:entry, \ +fbt:*sd:*sd_sense_key_illegal_request:entry + +#define ENTRY_PROBES \ +CDB_PROBES, \ +fbt:scsi:scsi_destroy_pkt:entry + +ENTRY_PROBES +/ this->pkt / +{ + this->cdb = (uchar_t *)this->pkt->pkt_cdbp; + this->scb = (uchar_t *)this->pkt->pkt_scbp; + this->group = ((this->cdb[0] & 0xe0) >> 5); + this->lbalen = 0; +} +/* + * 5.11 allows this->name = "->" but vanilla 5.10 does not so I'm sticking + * to the setting a variable and then choosing the string from the global + * array. + */ +fbt:scsi:scsi_transport:entry +/ this->pkt / +{ + start_time[this->scb] = timestamp; + this->name = 0; +} + +/* + * If we are filtering on target pid or execname then only report + * the return packets that we know we sent. + */ +fbt:scsi:scsi_destroy_pkt:entry +/ start_time[this->scb] == 0 && ( target != 0 || scsi_d_target_execname != "") / +{ + this->pkt = 0; +} + +fbt:scsi:scsi_destroy_pkt:entry +/ this->pkt / +{ + this->name = 1; +} +/* + * FRAMEWORK: Add your probe and set this->name to the offset of + * your string in the names table. + */ +fbt:*sd:*sd_sense_key_illegal_request:entry +/ this->pkt / +{ + this->name = 2; +} + +/* + * Now cope with the differnent CDB layouts based on the group. + * + * Trying to use x = this->group == 0 ? y : z; results in D running out + * of registers so it is all done using seperate probes. + * + * Group Listing: + * + Group 0: 6Byte CDBs: scsi_ops[0x000] thru scsi_ops[0x01f] + * + Group 1: 10Byte CDBs: scsi_ops[0x020] thru scsi_ops[0x03f] + * + Group 2: 10Byte CDBs: scsi_ops[0x040] thru scsi_ops[0x05f] + * + Group 3: Variable Length CDBs: scsi_ops[0x060] thru scsi_ops[0x07f] + * + Group 4: 16Byte CDBs: scsi_ops[0x080] thru scsi_ops[0x09f] + * + Group 5: 12Byte CDBs: scsi_ops[0x0a0] thru scsi_ops[0x0bf] + * Group 6: Vendor Specific CDBs: scsi_ops[0x0c0] thru scsi_ops[0x0df] + * Group 7: Vendor Specific CDBs: scsi_ops[0x0e0] thru scsi_ops[0x0ff] + * + * The groups with a leading plus sign "+" are of importance. + */ +ENTRY_PROBES +/ this->pkt && this->group == 0 / +{ + this->lba = INT32(0, (this->cdb[1] & 0x1f), + this->cdb[2], this->cdb[3]); + this->lbalen = 6; + this->len = INT8(this->cdb[4]); + this->control = this->cdb[5]; + this->sa = 0; + this->cdblen = 6; +} + +ENTRY_PROBES +/ this->pkt && this->group == 1 / +{ + this->lba = INT32(this->cdb[2], this->cdb[3], + this->cdb[4], this->cdb[5]); + this->lbalen = 8; + this->len = INT16(this->cdb[7], this->cdb[8]); + this->control = this->cdb[9]; + this->sa = 0; + this->cdblen = 10; +} + +ENTRY_PROBES +/ this->pkt && this->group == 2 / +{ + this->lba = INT32(this->cdb[2], this->cdb[3], + this->cdb[4], this->cdb[5]); + this->lbalen = 8; + this->len = INT16(this->cdb[7], this->cdb[8]); + this->control = this->cdb[9]; + this->sa = 0; + this->cdblen = 10; +} + +ENTRY_PROBES +/ this->pkt && this->group == 3 / +{ + /* + * This is to get around insufficient DTrace Registers... + * + * Instead of doing this in a single step like: + * + * this->lba = INT64(\ + * this->cdb[2], this->cdb[3], this->cdb[4], this->cdb[5], \ + * this->cdb[6], this->cdb[7], this->cdb[8], this->cdb[9]); + * + * The int64 LBA value must be calculated in 5 steps... + */ + this->lbaUpper = INT32(this->cdb[12], this->cdb[13], + this->cdb[14], this->cdb[15]); + this->lbaLower = INT32(this->cdb[16], this->cdb[17], + this->cdb[18], this->cdb[19]); + this->lba = INT64(this->lbaUpper, this->lbaLower); + this->lbaUpper = 0; + this->lbaLower = 0; + + this->lbalen = 16; + this->len = INT32(this->cdb[28], this->cdb[29], + this->cdb[30], this->cdb[31]); + this->control = this->cdb[1]; + this->sa = INT16(this->cdb[8], this->cdb[9]); + this->cdblen = 32 +} + +ENTRY_PROBES +/ this->pkt && this->group == 4 / +{ + /* + * This is to get around insufficient DTrace Registers... + * + * Instead of doing this in a single step like: + * + * this->lba = INT64(\ + * this->cdb[2], this->cdb[3], this->cdb[4], this->cdb[5], \ + * this->cdb[6], this->cdb[7], this->cdb[8], this->cdb[9]); + * + * The int64 LBA value must be calculated in 5 steps... + */ + this->lbaUpper = INT32(this->cdb[2], this->cdb[3], + this->cdb[4], this->cdb[5]); + this->lbaLower = INT32(this->cdb[6], this->cdb[7], + this->cdb[8], this->cdb[9]); + this->lba = INT64(this->lbaUpper, this->lbaLower); + this->lbaUpper = 0; + this->lbaLower = 0; + + this->lbalen = 16; + this->len = INT32(this->cdb[10], this->cdb[11], + this->cdb[12], this->cdb[13]); + this->control = this->cdb[15]; + this->sa = 0; + this->cdblen = 16; +} + +ENTRY_PROBES +/ this->pkt && this->group == 5 / +{ + this->lba = INT32(this->cdb[2], this->cdb[3], + this->cdb[4], this->cdb[5]); + this->lbalen = 8; + this->len = INT32(this->cdb[6], this->cdb[7], + this->cdb[8], this->cdb[9]); + this->control = this->cdb[11]; + this->sa = 0; + this->cdblen = 12; +} +/* + * We don't know the format of the group 6 and 7 commands as they are + * vendor specific. So I just print the cdb up to a maximum of 32 bytes. + */ +ENTRY_PROBES +/ this->pkt && this->group > 5 / +{ + this->lba = 0; + this->lbalen = 0; + this->len = 0; + this->control = 0; + this->sa = 0; +/* + * At some point pkt_cdblen will make it into 5.10 but I can't at this moment + * workout how I can test this before the script compiles (without wrapping + * this in a ksh script which as I have mentioned above I'm not going to do). + */ +#ifndef __SunOS_5_10 + this->cdblen = this->pkt->pkt_cdblen; +#else + this->cdblen = 32; +#endif + +} + +/* + * The guts of the script. Report what we have if we are required. + * First the stuff we do for both outgoing and incoming. + */ + +ENTRY_PROBES +/ !ARGUMENT_TEST / +{ + this->arg_test_passed = 0; +} + +ENTRY_PROBES +/ ARGUMENT_TEST / +{ + this->arg_test_passed = 1; +} + +/* + * If only reporting IO that takes more then a certan time then use + * speculations to capture the outgoing data and then only commit the + * speculation if the packet takes too long. + */ + +#ifdef REPORT_OVERTIME + +#define SPECULATE speculate(specs[this->scb]); + +ENTRY_PROBES +/ this->arg_test_passed && specs[this->scb] == 0 / +{ + specs[this->scb] = speculation(); +} + +#else + +#define SPECULATE + +#endif /* REPORT_OVERTIME */ + +#ifndef QUIET +ENTRY_PROBES +/ this->arg_test_passed / +{ + SPECULATE + PRINT_TIMESTAMP(); + PRINT_DEV_FROM_PKT(this->pkt); + printf("%s 0x%2.2x %9s address %2.2d:%2.2d, lba 0x%*.*x, ", + names[this->name], + this->cdb[0], + scsi_ops[(uint_t)this->cdb[0], this->sa] != 0 ? + scsi_ops[(uint_t)this->cdb[0], this->sa] : "Unknown", + this->pkt->pkt_address.my_atarget, + this->pkt->pkt_address.my_alun, + this->lbalen, this->lbalen, + this->lba); + printf("len 0x%6.6x, control 0x%2.2x timeout %d CDBP %p", + this->len, this->control, + this->pkt->pkt_time, this->cdb); +} + +CDB_PROBES +/ this->arg_test_passed == 1 / +{ + SPECULATE + printf(" %d %s(%d)", this->arg_test_passed, execname, pid); +} + +/* + * For those who are just not happy without some raw hex numbers. Print the + * raw cdb. + */ +CDB_PROBES +/ this->arg_test_passed == 1 && this->cdblen / +{ + SPECULATE + printf(" cdb(%d) ", this->cdblen); +} + +/* + * Now print each byte out of the cdb. + */ + +#define PRINT_CDB(N) \ +CDB_PROBES \ +/ this->arg_test_passed == 1 && this->cdblen > N/ \ +{ \ + SPECULATE; \ + printf("%2.2x", this->cdb[N]) \ +} +#else /* QUIET */ +#define PRINT_CDB(N) +#endif /* QUIET */ + +PRINT_CDB(0) +PRINT_CDB(1) +PRINT_CDB(2) +PRINT_CDB(3) +PRINT_CDB(4) +PRINT_CDB(5) +PRINT_CDB(6) +PRINT_CDB(7) +PRINT_CDB(8) +PRINT_CDB(9) +PRINT_CDB(10) +PRINT_CDB(11) +PRINT_CDB(12) +PRINT_CDB(13) +PRINT_CDB(14) +PRINT_CDB(15) +PRINT_CDB(16) +PRINT_CDB(17) +PRINT_CDB(18) +PRINT_CDB(19) +PRINT_CDB(20) +PRINT_CDB(21) +PRINT_CDB(22) +PRINT_CDB(23) +PRINT_CDB(24) +PRINT_CDB(25) +PRINT_CDB(26) +PRINT_CDB(27) +PRINT_CDB(28) +PRINT_CDB(29) +PRINT_CDB(30) +PRINT_CDB(31) + +/* + * Now the result on the incoming. + */ +#ifdef PERF_REPORT +fbt:scsi:scsi_destroy_pkt:entry +/ this->arg_test_passed == 1 && start_time[this->scb] != 0 / +{ + @[DEV_NAME(this->pkt), DEV_INST(this->pkt) +#ifdef TARGET_STATS + , this->pkt->pkt_address.a_target +#ifdef LUN_STATS + , this->pkt->pkt_address.a_lun +#endif +#endif + ] = quantize(timestamp - start_time[this->scb]); +} +#endif + +fbt:scsi:scsi_destroy_pkt:entry +/ this->arg_test_passed == 1 / +{ + this->state = *(this->scb); + this->state = this->state & 0x3E; +} +#ifndef QUIET +fbt:scsi:scsi_destroy_pkt:entry +/ this->arg_test_passed == 1 / +{ + SPECULATE + printf(", reason 0x%x (%s) pkt_state 0x%x state 0x%x %s Time %dus\n", + this->pkt->pkt_reason, + scsi_reasons[this->pkt->pkt_reason] != 0 ? + scsi_reasons[this->pkt->pkt_reason] : "Unknown", + this->pkt->pkt_state, this->state, scsi_state[this->state] != 0 ? + scsi_state[this->state] : "Unknown", + start_time[this->scb] != 0 ? + (timestamp - start_time[this->scb])/1000 : 0); +} +#endif + +#ifndef QUIET +CDB_PROBES +/ this->arg_test_passed == 1 / +{ + SPECULATE + printf("\n"); +} +#endif +/* + * printing stacks of where we are called from can be useful however it + * does mean you can't just pipe the output through sort(1). Does not work + * with overtime as you can't put a stack into a speculation. + */ +#ifdef PRINT_STACK +CDB_PROBES +/ this->arg_test_passed == 1 / +{ + stack(10); +} +#endif + +#ifdef REPORT_OVERTIME +fbt:scsi:scsi_destroy_pkt:entry +/ this->arg_test_passed == 1 && +((timestamp - start_time[this->scb]) > REPORT_OVERTIME && start_time[this->scb]) / +{ + SPECULATE + commit(specs[this->scb]); + specs[this->scb] = 0; +} +fbt:scsi:scsi_destroy_pkt:entry +/ this->arg_test_passed == 1 && +! ((timestamp - start_time[this->scb]) > REPORT_OVERTIME) / +{ + discard(specs[this->scb]); + specs[this->scb] = 0; +} +#endif + +fbt:scsi:scsi_destroy_pkt:entry +/ this->arg_test_passed == 1 / +{ + start_time[this->scb] = 0; +} + diff --git a/scsilatency.d b/scsilatency.d new file mode 100644 index 0000000..8e95073 --- /dev/null +++ b/scsilatency.d @@ -0,0 +1,58 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +string scsi_cmd[uchar_t]; + +dtrace:::BEGIN +{ + /* See /usr/include/sys/scsi/generic/commands.h for the full list. */ + scsi_cmd[0x00] = "test_unit_ready"; + scsi_cmd[0x08] = "read"; + scsi_cmd[0x0a] = "write"; + scsi_cmd[0x12] = "inquiry"; + scsi_cmd[0x17] = "release"; + scsi_cmd[0x1a] = "mode_sense"; + scsi_cmd[0x1b] = "load/start/stop"; + scsi_cmd[0x1c] = "get_diagnostic_results"; + scsi_cmd[0x1d] = "send_diagnostic_command"; + scsi_cmd[0x25] = "read_capacity"; + scsi_cmd[0x28] = "read(10)"; + scsi_cmd[0x2a] = "write(10)"; + scsi_cmd[0x35] = "synchronize_cache"; + scsi_cmd[0x4d] = "log_sense"; + scsi_cmd[0x5e] = "persistent_reserve_in"; + scsi_cmd[0xa0] = "report_luns"; + + printf("Tracing... Hit Ctrl-C to end.\n"); +} + +fbt::scsi_transport:entry +{ + start[arg0] = timestamp; +} + +fbt::scsi_destroy_pkt:entry +/start[arg0]/ +{ + this->delta = (timestamp - start[arg0]) / 1000; + this->code = *args[0]->pkt_cdbp; + this->cmd = scsi_cmd[this->code] != NULL ? + scsi_cmd[this->code] : lltostr(this->code); + this->reason = args[0]->pkt_reason == 0 ? "Success" : + strjoin("Fail:", lltostr(args[0]->pkt_reason)); + + @num[this->cmd, this->reason] = count(); + @average[this->cmd, this->reason] = avg(this->delta); + @total[this->cmd, this->reason] = sum(this->delta); + + start[arg0] = 0; +} + +dtrace:::END +{ + normalize(@total, 1000); + printf("\n %-26s %-12s %11s %11s %11s\n", "SCSI COMMAND", + "COMPLETION", "COUNT", "AVG(us)", "TOTAL(ms)"); + printa(" %-26s %-12s %@11d %@11d %@11d\n", @num, @average, @total); +} \ No newline at end of file diff --git a/sdretry.d b/sdretry.d new file mode 100644 index 0000000..14e812a --- /dev/null +++ b/sdretry.d @@ -0,0 +1,23 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +dtrace:::BEGIN +{ + printf("Tracing... output every 10 seconds.\n"); +} + +fbt::sd_set_retry_bp:entry +{ + @[xlate (args[1])->dev_statname, + xlate (args[1])->dev_major, + xlate (args[1])->dev_minor] = count(); +} + +tick-10sec +{ + printf("\n%Y:\n", walltimestamp); + printf("%28s %-3s,%-4s %s\n", "DEVICE", "MAJ", "MIN", "RETRIES"); + printa("%28s %-03d,%-4d %@d\n", @); + trunc(@); +} diff --git a/sdretry1.d b/sdretry1.d new file mode 100644 index 0000000..24fc531 --- /dev/null +++ b/sdretry1.d @@ -0,0 +1,23 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +dtrace:::BEGIN +{ +printf("Tracing... output every 10 seconds.\n"); +} + +fbt::sd_set_retry_bp:entry +{ +@[xlate (args[1])->dev_statname, +xlate (args[1])->dev_major, +xlate (args[1])->dev_minor] = count(); +} + +tick-10sec +{ +printf("\n%Y:\n", walltimestamp); +printf("%28s %-3s,%-4s %s\n", "DEVICE", "MAJ", "MIN", "RETRIES"); +printa("%28s %-03d,%-4d %@d\n", @); +trunc(@); +} \ No newline at end of file diff --git a/spa_sync.d b/spa_sync.d new file mode 100644 index 0000000..a3f00c3 --- /dev/null +++ b/spa_sync.d @@ -0,0 +1,48 @@ +#!/usr/sbin/dtrace -qs + +/* + * spa_sync.d - ROCH http://blogs.sun.com/roch/entry/128k_suffice + * mods by benr + * + * Measure I/O throughput as generated by spa_sync + * Between the spa_sync entry and return probe + * I count all I/O and bytes going through bdev_strategy. + * This is a lower bound on what the device can do since + * some aspects of spa_sync are non-concurrent I/Os. + */ + +BEGIN { + tt = 0; /* timestamp */ + b = 0; /* Bytecount */ + cnt = 0; /* iocount */ +} + +spa_sync:entry/(self->t == 0) && (tt == 0)/{ + b = 0; /* reset the I/O byte count */ + cnt = 0; + tt = timestamp; + self->t = 1; + printf("%Y", walltimestamp); +} + +spa_sync:return +/(self->t == 1) && (tt != 0)/ +{ + this->delta = (timestamp-tt); + this->cnt = (cnt == 0) ? 1 : cnt; /* avoid divide by 0 */ + printf("t: %d MB; %d ms of spa_sync; avg sz : %d KB; throughput %d MB/sn", + b / 1048576, + this->delta / 1000000, + b / this->cnt / 1024, + (b * 1000000000) / (this->delta * 1048676)); + tt = 0; + self->t = 0; +} + +/* We only count I/O issued during an spa_sync */ +bdev_strategy:entry +/tt != 0/ +{ + cnt ++; + b += (args[0]->b_bcount); +} \ No newline at end of file diff --git a/spa_sync__.d b/spa_sync__.d new file mode 100644 index 0000000..55ada38 --- /dev/null +++ b/spa_sync__.d @@ -0,0 +1,41 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +inline int MIN_MS = 1; + +dtrace:::BEGIN +{ + printf("Tracing ZFS spa_sync() slower than %d ms...\n", MIN_MS); + @bytes = sum(0); +} + +fbt::spa_sync:entry +/!self->start/ +{ + in_spa_sync = 1; + self->start = timestamp; + self->spa = args[0]; +} + +io:::start +/in_spa_sync/ +{ + @io = count(); + @bytes = sum(args[0]->b_bcount); +} + +fbt::spa_sync:return +/self->start && (this->ms = (timestamp - self->start) / 1000000) > MIN_MS/ +{ + normalize(@bytes, 1048576); + printf("%-20Y %-10s %6d ms, ", walltimestamp, + stringof(self->spa->spa_name), this->ms); + printa("%@d MB %@d I/O\n", @bytes, @io); +} + +fbt::spa_sync:return +{ + self->start = 0; self->spa = 0; in_spa_sync = 0; + clear(@bytes); clear(@io); +} diff --git a/spacemapalloc_lat.d b/spacemapalloc_lat.d new file mode 100644 index 0000000..3b5516d --- /dev/null +++ b/spacemapalloc_lat.d @@ -0,0 +1,24 @@ +#!/usr/sbin/dtrace -s + +fbt::space_map_alloc:entry +{ + self->s = arg1; +} + +fbt::space_map_alloc:return +/arg1 != -1/ +{ + self->s = 0; +} + +fbt::space_map_alloc:return +/self->s && (arg1 == -1)/ +{ + @s = quantize(self->s); + self->s = 0; +} + +tick-10s +{ + printa(@s); +} \ No newline at end of file diff --git a/spasync.d b/spasync.d new file mode 100644 index 0000000..4b1236e --- /dev/null +++ b/spasync.d @@ -0,0 +1,52 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet +#pragma D option dynvarsize=8m + +inline int MIN_MS = 1000; + +dtrace:::BEGIN +{ + printf("Tracing ZFS spa_sync() slower than %d ms...\n", MIN_MS); + /* @bytes = sum(0); */ +} + +fbt::spa_sync:entry +/!self->start/ +{ + in_spa_sync = 1; + self->start = timestamp; + self->spa = args[0]; + _count = 0; + _bytes = 0; +} + +io:::start +/in_spa_sync/ +{ + /* @io = count(); + @bytes = sum(args[0]->b_bcount); */ + @avg_bytesize = avg(args[0]->b_bcount); + _bytes += (args[0]->b_bcount); + _count ++; +} + +fbt::spa_sync:return +/ in_spa_sync && self->spa && self->start && (this->ms = (timestamp - self->start) / 1000000) > MIN_MS/ +{ + this->count = (_count == 0) ? 1 : _count; /* We need to avoid division by zero */ + /* normalize(@bytes, 1048576); */ + this->mbytes = (_bytes == 0) ? 1 : _bytes / 1048576; + this->avg_iosize = _bytes / this->count; + printf("%-20Y %-10s %6d ms, ", walltimestamp, + stringof(self->spa->spa_name), this->ms); + /* printa("%@d MB %@d I/O\n", @bytes, @io); */ + printf("Bytes(b): %d, Megabytes(MB): %d, IOP(s): %d, Avg. Size: %d\n", _bytes, this->mbytes, _count, this->avg_iosize); +} + +fbt::spa_sync:return +/self->spa/ +{ + self->start = 0; self->spa = 0; in_spa_sync = 0; _bytes = 0; + /* clear(@bytes); clear(@io); */ +} \ No newline at end of file diff --git a/tcp_rtt.d b/tcp_rtt.d new file mode 100644 index 0000000..795c559 --- /dev/null +++ b/tcp_rtt.d @@ -0,0 +1,33 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +tcp:::send +{ + snxt[args[1]->cs_cid, args[3]->tcps_snxt] = timestamp; + bytesrttttmp[args[1]->cs_cid, args[3]->tcps_snxt] = + args[2]->ip_plength - args[4]->tcp_offset; +} + +tcp:::receive +/ snxt[args[1]->cs_cid, args[4]->tcp_ack] / +{ + @bytesrtt[args[2]->ip_saddr, args[4]->tcp_sport] = + sum(bytesrttttmp[args[1]->cs_cid, args[4]->tcp_ack]); + @meanrtt[args[2]->ip_saddr, args[4]->tcp_sport] = + avg(timestamp - snxt[args[1]->cs_cid, args[4]->tcp_ack]); + @stddevrtt[args[2]->ip_saddr, args[4]->tcp_sport] = + stddev(timestamp - snxt[args[1]->cs_cid, args[4]->tcp_ack]); + @countrtt[args[2]->ip_saddr, args[4]->tcp_sport] = count(); + snxt[args[1]->cs_cid, args[4]->tcp_ack] = 0; + bytesrttttmp[args[1]->cs_cid, args[4]->tcp_ack] = 0; +} + +END +{ + printf("%-20s %-8s %-15s %-15s %-10s %-7s\n", + "Remote host", "Port", "TCP Avg RTT(ns)", "StdDev", "NumBytes", + "NumSegs"); + printa("%-20s %-8d %@-15d %@-15d %@-10d %@-7d\n", @meanrtt, + @stddevrtt, @bytesrtt, @countrtt); +} diff --git a/trace_arc.d b/trace_arc.d new file mode 100644 index 0000000..21143ab --- /dev/null +++ b/trace_arc.d @@ -0,0 +1,240 @@ +#!/usr/sbin/dtrace -s +#pragma D option flowindent + +fbt:zfs:arc_read:entry +/pid == $target/ +{ + self->ts = timestamp; + self->interested = 1; +} + +zfs:: +/self->interested/ +{ +} + +buf_hash_find:entry +/self->interested/ +{ + printf("Spa: %d, DVA %d.%d, birth: %d", args[0], args[1]->dva_word[0], args[1]->dva_word[1], args[2]); +} + +buf_hash_insert:entry +/self->interested/ +{ + /* ARC_IN_HASH_TABLE flag might be set here */ +} + +arc_buf_alloc:entry +/self->interested/ +{ + printf("b_datacnt may be incremented here"); +} + +arc_buf_clone:entry +/self->interested/ +{ + printf("b_datacnt may be incremented here"); +} + +buf_hash_insert:return +/self->interested/ +{ + +} + +arc_released:return +/self->interested/ +{ + trace(args[1]); +} + +dbuf_read_done:entry +/self->interested/ +{ + trace(args[0]->io_error); +} + +dbuf_read_impl:entry +/self->interested/ +{ + /* ARC_L2CACHE flag might be set here */ +} + +dbuf_read_impl:return +/self->interested/ +{ + +} + +dmu_objset_open_impl:entry +/self->interested/ +{ + /* ARC_L2CACHE flag might be set here */ +} + +dmu_objset_open_impl:return +/self->interested/ +{ + +} + +arc_read_nolock:entry +/self->interested/ +{ + printf("IMPORTANT - next, figure out who gives us these arc_flags and why? \n"); + printf("arc_flags: %d - ", *args[7]); + printf("%s", (*args[7] & (1 << 1)) ? "ARC_WAIT " : ""); + printf("%s", (*args[7] & (1 << 2)) ? "ARC_NOWAIT " : ""); + printf("%s", (*args[7] & (1 << 3)) ? "ARC_PREFETCH " : ""); + printf("%s", (*args[7] & (1 << 4)) ? "ARC_CACHED " : ""); + printf("%s", (*args[7] & (1 << 5)) ? "ARC_L2CACHE " : ""); + /* ARC_L2CACHE flag might be set here in 3 different spots */ + +} + +arc_read_nolock:return +/self->interested/ +{ + +} + +arc_write:entry +/self->interested/ +{ + +} + +arc_write:return +/self->interested/ +{ + /* ARC_L2CACHE flag might be set here */ +} + + +arc_get_data_buf:entry +/self->interested/ +{ + printf("When called from within arc_read_nolock, b_datacnt is increased right before calling here. See arc.c line 2827 \n"); + printf("Buf state: %a, Buf size: %d, Buf type: %s", args[0]->b_hdr->b_state, args[0]->b_hdr->b_size, args[0]->b_hdr->b_type == 0 ? "ARC_BUFC_DATA" : "ARC_BUFC_METADATA"); +} + +arc_adapt:entry +/self->interested/ +{ + printf("State: %a, Size: %d", args[1], args[0]); +} + +arc_reclaim_needed:return +/self->interested/ +{ + printf("%s", args[1] == 0 ? "false" : "true"); +} + +arc_change_state:entry +/self->interested/ +{ + printf("New state: %a, Old state: %a, Refcnt: %d, Datacnt: %d, B_size: %d", args[0], args[1]->b_state, args[1]->b_refcnt.rc_count, args[1]->b_datacnt, args[1]->b_size); +} + +zio_buf_alloc:entry +/self->interested/ +{ + trace(args[0]); +} + +arc_space_consume:entry +/self->interested/ +{ + printf("Consuming %d bytes of arc_space_type %s", args[0], args[1] == 0 ? "DATA" : "HDRS, L2HDRS or OTHER"); +} + +dbuf_update_data:entry +/self->interested/ +{ + trace(args[0]->db_level); + trace(args[0]->db_user_data_ptr_ptr); +} + +buf_hash_find:return +/self->interested/ +{ + printf("Datacnt: %d, Hdr_flags: %d", args[1]->b_datacnt, args[1]->b_flags); +} + +arc_access:entry +/self->interested/ +{ + printf("Datacnt: %d,",args[0]->b_datacnt); + printf(" Hdr_flags: %d = ", args[0]->b_flags); + printf("%s", (args[0]->b_flags & (1 << 1)) ? "ARC_WAIT " : ""); + printf("%s", (args[0]->b_flags & (1 << 2)) ? "ARC_NOWAIT " : ""); + printf("%s", (args[0]->b_flags & (1 << 3)) ? "ARC_PREFETCH " : ""); + printf("%s", (args[0]->b_flags & (1 << 4)) ? "ARC_CACHED " : ""); + printf("%s", (args[0]->b_flags & (1 << 5)) ? "ARC_L2CACHE " : ""); + printf("%s", (args[0]->b_flags & (1 << 6)) ? "WTF_6 " : ""); + printf("%s", (args[0]->b_flags & (1 << 7)) ? "WTF_7 " : ""); + printf("%s", (args[0]->b_flags & (1 << 8)) ? "WTF_8 " : ""); + printf("%s", (args[0]->b_flags & (1 << 9)) ? "ARC_IN_HASH_TABLE " : ""); + printf("%s", (args[0]->b_flags & (1 << 10)) ? "ARC_IO_IN_PROGRESS " : ""); + printf("%s", (args[0]->b_flags & (1 << 11)) ? "ARC_IO_ERROR " : ""); + printf("%s", (args[0]->b_flags & (1 << 12)) ? "ARC_FREED_IN_READ " : ""); + printf("%s", (args[0]->b_flags & (1 << 13)) ? "ARC_BUF_AVAILABLE " : ""); + printf("%s", (args[0]->b_flags & (1 << 14)) ? "ARC_INDIRECT " : ""); + printf("%s", (args[0]->b_flags & (1 << 15)) ? "ARC_FREE_IN_PROGRESS " : ""); + printf("%s", (args[0]->b_flags & (1 << 16)) ? "ARC_L2_EVICTING " : ""); + printf("%s", (args[0]->b_flags & (1 << 17)) ? "ARC_L2_EVICTED " : ""); + printf("%s", (args[0]->b_flags & (1 << 18)) ? "ARC_L2_WRITE_HEAD " : ""); + + +} +add_reference:entry +/self->interested/ +{ + trace(args[0]->b_datacnt); + trace(args[0]->b_flags); +} + +fbt:zfs:arc_read:return +/pid == $target/ +{ + printf("Time spent in arc_read: %d ns", timestamp - self->ts); + self->iinterested = 0; + self->ts = NULL; +} + +fbt::arc_evict:entry +/self->interested/ +{ + printf("%Y %-10a %-10s %-10s %d bytes\n", walltimestamp, args[0], + arg4 == 0 ? "data" : "metadata", + arg3 == 0 ? "evict" : "recycle", arg2); + + /* ARC_IN_HASH_TABLE flag might be set here */ + +} + +fbt::arc_buf_evict:entry +/self->interested/ +{ + + /* ARC_IN_HASH_TABLE flag might be set here */ + +} + +fbt:zfs:arc_evict_needed:return +/self->interested/ +{ + printf("%s", args[1] == 0 ? "false" : "true"); +} + +fbt:zfs:buf_hash_remove:entry +/self->interested/ +{ + printf("Removing buffer hash of %a", args[0]->b_state); +} + +fbt:zfs:buf_hash_remove:return +/self->interested/ +{ +} diff --git a/trussrw.d b/trussrw.d new file mode 100644 index 0000000..0b507bd --- /dev/null +++ b/trussrw.d @@ -0,0 +1,31 @@ +/* +* Run script with dtrace -qs +*/ +dtrace:::BEGIN + { + i = 20; + } + + +syscall::read:entry, +syscall::write:entry +/i > 0, pid == $1/ +{ + trace(i--); + printf("\n[%s] , %s, %s(%d, 0x%x, %4d)", execname, probename, probefunc, arg0, arg1, arg2); +} + +syscall::read:return, +syscall::write:return +/pid == $1/ +{ + printf("\t\t = %d\n", arg1); +} + +syscall::read:entry, +syscall::write:entry +/i == 0/ +{ + trace("blastoff!"); + exit(0) +} \ No newline at end of file diff --git a/txg_monitor.d b/txg_monitor.d index 1afe9de..4947f00 100644 --- a/txg_monitor.d +++ b/txg_monitor.d @@ -62,4 +62,4 @@ fbt::spa_sync:return { self->start = 0; self->spa = 0; in_spa_sync = 0; clear(@bytes); clear(@io); clear(@delays); clear(@throttles); trunc(@wrl_min); trunc(@wrl_max); trunc(@reserved_max); -} +} \ No newline at end of file diff --git a/uberupd.d b/uberupd.d new file mode 100644 index 0000000..f22bccd --- /dev/null +++ b/uberupd.d @@ -0,0 +1,38 @@ +#!/usr/sbin/dtrace -s + +/* #pragma D option quiet */ + +/* This section may be of questionable value, but +could be used to determine which block numbers correlate with +which uberblock update events +*/ +io:genunix:default_physio:start, +io:genunix:bdev_strategy:start, +io:genunix:biodone:done +{ + printf ("%d %s Blk Num: %d Byte Count: %d", timestamp, execname, + args[0]->b_blkno, args[0]->b_bcount); +} + +fbt:zfs:uberblock_update:entry +{ + printf ("%d %s, %d, %d, last_sync_TXG: %d, birth_TXG: %d, %d, curr_TXG: %d", timestamp, execname, + pid, args[0]->ub_rootbp.blk_prop, args[0]->ub_txg, + args[0]->ub_rootbp.blk_birth, args[1]->vdev_asize, args[2]); +} + +/* +Watch carefully for the `:return` probe, last arg. of which is a bool., +indicating whether or not uberblock was updated during last TXG. +If the birth_TXG and the curr_TXG match, we know uberblock was updated, +and sure enough the `:return` probe should have its last argument be '1'. + +Example (pay attn. to birth_TXG: 6147837, and curr_TXG: 6147837): + 0 52274 uberblock_update:entry 5857149610527 sched, 0, 9226475966770118659, last_sync_TXG: 6147836, birth_TXG: 6147837, 0, curr_TXG: 6147837 + 0 52275 uberblock_update:return 5857149619433 sched, 0, 1 +*/ +fbt:zfs:uberblock_update:return +{ + printf ("%d %s, %d, %d", timestamp, execname, + pid, args[1]); +} diff --git a/vnodeops.d b/vnodeops.d new file mode 100644 index 0000000..025416b --- /dev/null +++ b/vnodeops.d @@ -0,0 +1,75 @@ +#!/usr/sbin/dtrace -s +/* + * vnodeops.d show latency in microseconds at the VFS layer for the various + * vnode types when reads, writes or fsyncs occur. + * + * This is based on scsilatency.d from the DTrace book, chapter 4. + * + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at http://smartos.org/CDDL + * + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file. + * + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * Copyright (c) 2013 RackTop Systems, All rights reserved. + * Copyright (c) 2013 Sam Zaydel, All rights reserved. + * + * CDDL HEADER END + * +*/ + #pragma D option quiet + +string vnode_type[uchar_t]; + +dtrace:::BEGIN +{ + /* See /usr/src/uts/common/sys/vnode.h for the full list. */ + vnode_type[0] = "no_type"; + vnode_type[1] = "regular"; + vnode_type[2] = "directory"; + vnode_type[3] = "block_device"; + vnode_type[4] = "character_device"; + vnode_type[5] = "link"; + vnode_type[6] = "fifo"; + vnode_type[7] = "door"; + vnode_type[8] = "procfs"; + vnode_type[9] = "sockfs"; + vnode_type[10] = "event_port"; + vnode_type[11] = "bad_vnode"; + + printf("Tracing... Hit Ctrl-C to end.\n\n"); +} + +fbt:genunix:fop_fsync:entry, +fbt:genunix:fop_read:entry, +fbt:genunix:fop_write:entry { + + self->start = timestamp; + this->type = args[0]->v_type; + self->vnodetype = vnode_type[this->type] != NULL ? vnode_type[this->type] : "NULL"; +} + +fbt:genunix:fop_fsync:return, +fbt:genunix:fop_read:return, +fbt:genunix:fop_write:return +/self->start/ +{ + this->delta = timestamp - self->start; + @latbytype["Time(us):", self->vnodetype, probefunc] = quantize(this->delta / 1000); + self->start = 0; + this->delta = 0; + self->vnodetype = "x"; +} diff --git a/watch-arc-shrink.d b/watch-arc-shrink.d new file mode 100644 index 0000000..dbb33e4 --- /dev/null +++ b/watch-arc-shrink.d @@ -0,0 +1,13 @@ +#!/usr/sbin/dtrace -s + +fbt:zfs:arc_reclaim_needed:return +/args[1]/ +{ + printf("%Y return=%d\n", walltimestamp, args[1]); +} + +fbt:zfs:arc_shrink:entry +{ + printf("%Y\n", walltimestamp); +} + diff --git a/zfs_perds_io_latency.d b/zfs_perds_io_latency.d new file mode 100644 index 0000000..e44341e --- /dev/null +++ b/zfs_perds_io_latency.d @@ -0,0 +1,57 @@ +#!/usr/sbin/dtrace -s +#pragma D option quiet + +BEGIN { + + x = 0; + y = 0; + watermark = 100; /* We want to track all IO slower than 50ms. */ + start_d = timestamp; + to_millisec = 1000000; + to_microsec = 1000; + div = "--------------------------------------------------------------------------------"; +} + +dmu_buf_hold_array_by_dnode:entry +/args[0]->dn_objset->os_dsl_dataset && args[3]/ /* Reads */ + +{ + this->ds = stringof(args[0]->dn_objset->os_dsl_dataset->ds_dir->dd_myname); + this->parent = stringof(args[0]->dn_objset->os_dsl_dataset->ds_dir->dd_parent->dd_myname); + /* this->path = strjoin(strjoin(this->parent,"/"),this->ds); */ /* Dirty hack - parent/this format doesn't guarantee full path */ + self->path = strjoin(strjoin(this->parent,"/"),this->ds); + self->uniq = args[0]->dn_objset->os_dsl_dataset->ds_fsid_guid; + self->reads = 1; + ts[ self->path, self->reads, self->uniq ] = timestamp; + +} + +dmu_buf_hold_array_by_dnode:entry +/args[0]->dn_objset->os_dsl_dataset && !args[3]/ /* Writes */ + +{ + this->ds = stringof(args[0]->dn_objset->os_dsl_dataset->ds_dir->dd_myname); + this->parent = stringof(args[0]->dn_objset->os_dsl_dataset->ds_dir->dd_parent->dd_myname); + /* this->path = strjoin(strjoin(this->parent,"/"),this->ds); */ /* Dirty hack - parent/this format doesn't guarantee full path */ + self->path = strjoin(strjoin(this->parent,"/"),this->ds); + self->uniq = args[0]->dn_objset->os_dsl_dataset->ds_fsid_guid; + self->writes = 1; + ts[ self->path, self->writes, self->uniq ] = timestamp; + +} + +dmu_buf_hold_array_by_dnode:return +/ts[ self->path, self->reads, self->uniq ] && args[1] == 0/ +{ + x = (timestamp - ts[ self->path, self->reads, self->uniq ]) / to_microsec; + @["Reads (us)", self->path] = lquantize(x,0,100,10); + self->reads = 0; +} + +dmu_buf_hold_array_by_dnode:return +/ts[ self->path, self->writes, self->uniq ] && args[1] == 0/ +{ + x = (timestamp - ts[ self->path, self->writes, self->uniq ]) / to_microsec; + @["Writes (us)", self->path] = lquantize(x,0,100,10); + self->writes = 0; +} diff --git a/zfs_vdev_iostart.d b/zfs_vdev_iostart.d new file mode 100644 index 0000000..62179b1 --- /dev/null +++ b/zfs_vdev_iostart.d @@ -0,0 +1,52 @@ +#!/usr/sbin/dtrace -s +#pragma D option quiet +#pragma D option dynvarsize=4M + +BEGIN { + + x = 0; + y = 0; + watermark = 50; /* We want to track all IO slower than 50ms. */ + start_d = walltimestamp; + to_millisec = 1000000; + to_microsec = 1000; + div = "--------------------------------------------------------------------------------"; +} + +fbt:zfs:zio_vdev_io_start:entry +{ + self->io_offset = args[0]->io_offset; + self->io_txg = args[0]->io_txg; + ts[ self->io_offset, self->io_txg ] = timestamp; + self->vtime = vtimestamp; +} + +fbt:zfs:zio_vdev_io_done:entry +/* Measure Latency of I/Os tagged as ZIO_TYPE_READ */ +/self->vtime && ts[ self->io_offset, self->io_txg ] && args[0]->io_spa->spa_name != NULL && args[0]->io_type == ZIO_TYPE_READ/ +{ + this->spa_name = stringof(args[0]->io_spa->spa_name); + this->x = (timestamp - ts[ self->io_offset, self->io_txg ]) / to_microsec; + this->y = (vtimestamp - self->vtime) / to_microsec; + @Realt["Time (us)", this->spa_name, "READ"] = lquantize(this->x,10,100,10); + @CPUt["Time onCPU (us)", this->spa_name, "READ"] = lquantize(this->y,0,50,10); + @rlat["Avg. Time (us)","READ"] = avg(this->x); + @roncpu["Avg. Time onCPU (us)","READ"] = avg(this->y); + ts[ self->io_offset, self->io_txg ] = 0; + self->vtime = 0; +} + +fbt:zfs:zio_vdev_io_done:entry +/* Measure Latency of I/Os tagged as ZIO_TYPE_WRITE */ +/self->vtime && ts[ self->io_offset, self->io_txg ] && args[0]->io_spa->spa_name != NULL && args[0]->io_type == ZIO_TYPE_WRITE/ +{ + this->spa_name = stringof(args[0]->io_spa->spa_name); + this->x = (timestamp - ts[ self->io_offset, self->io_txg ]) / to_microsec; + this->y = (vtimestamp - self->vtime) / to_microsec; + @Realt["Time (us)", this->spa_name, "WRITE"] = lquantize(this->x,10,100,10); + @CPUt["Time onCPU (us)", this->spa_name, "WRITE"] = lquantize(this->y,0,50,10); + @wlat["Avg. Time (us)","WRITE"] = avg(this->x); + @woncpu["Avg. Time onCPU (us)","WRITE"] = avg(this->y); + ts[ self->io_offset, self->io_txg ] = 0; + self->vtime = 0; +} diff --git a/zfs_write_throttle.d b/zfs_write_throttle.d new file mode 100644 index 0000000..8c902d3 --- /dev/null +++ b/zfs_write_throttle.d @@ -0,0 +1,103 @@ +#!/usr/sbin/dtrace -s + +/* + * COPYRIGHT: Copyright (c) 2008 Thomas Bastian. + * + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy at http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * CDDL HEADER END + * + * Version: 20080728 + */ + +#pragma D option quiet + +fbt:zfs:spa_sync:entry +{ + self->t = timestamp; + self->pool = args[0]->spa_dsl_pool; + self->txg = args[1]; + self->pname = stringof(self->pool->dp_root_dir->dd_myname); + + @wlimit[self->pname, "Write limit (MB)"] = + lquantize(self->pool->dp_write_limit / 1048576, 250, 8192, 250); + @scnt[self->pname] = count(); +} + +fbt:zfs:spa_sync:return +/self->t/ +{ + @stime[self->pname, "Sync time (ms)"] = + lquantize((timestamp - self->t) / 1000000, 20, 2000, 20); + @bytes[self->pname] = + sum(self->pool->dp_space_towrite[self->txg & 4] / 1048576); + @written[self->pname, "Written (MB)"] = + lquantize(self->pool->dp_space_towrite[self->txg & 4] / 1048576, + 200, 4096, 200); + + self->t = 0; + self->pool = 0; + self->txg = 0; + self->pname = 0; +} + +fbt:zfs:txg_delay:entry +{ + self->d_t = timestamp; + self->d_pname = stringof(args[0]->dp_root_dir->dd_myname); +} + +fbt:zfs:txg_delay:return +/self->d_t && ((timestamp - self->d_t) >= 10000)/ +{ + @txgdelay[self->d_pname] = count(); + self->d_t = 0; + self->d_pname = 0; +} + +fbt:zfs:txg_delay:return +/self->d_t && ((timestamp - self->d_t) < 10000)/ +{ + self->d_t = 0; + self->d_pname = 0; +} + +tick-10s +{ + printf("--- %Y\n", walltimestamp); + normalize(@scnt, 10); + printf(" %-47s %18s\n", "", "Sync rate (/s)"); + printa(" %-47s %@18d\n", @scnt); + printf("\n"); + trunc(@scnt); + + normalize(@bytes, 10); + printf(" %-47s %18s\n", "", "MB/s"); + printa(" %-47s %@18d\n", @bytes); + printf("\n"); + trunc(@bytes); + + normalize(@txgdelay, 10); + printf(" %-47s %18s\n", "", "Delays/s"); + printa(" %-47s %@18d\n", @txgdelay); + printf("\n"); + trunc(@txgdelay); + + printa(@stime); + trunc(@stime); + + printa(@written); + trunc(@written); + + printa(@wlimit); + trunc(@wlimit); +} diff --git a/zfsarcblocksize.d b/zfsarcblocksize.d new file mode 100644 index 0000000..e2744af --- /dev/null +++ b/zfsarcblocksize.d @@ -0,0 +1,47 @@ +#!/usr/sbin/dtrace -s +#pragma D option quiet +/* +* zfsarcblocksize.d - Script used for high-level observation +* of the ARC size distribution for data and metadata accesses +* quantize function is used to display a distribution plot +* of bytes and of type of data being accessed, i.e. real data +* or Metadata. It may be useful to observe the sizes of blocks +* being retrieved and the sizes of metadata reads vs. data reads +* in the ARC. +*/ + +dtrace:::BEGIN +{ + cnt = 0; +} + +sdt:zfs::arc-hit,sdt:zfs::arc-miss +/ this->b_size = ((arc_buf_hdr_t *)arg0)->b_size / +{ + @b["bytes"] = quantize(this->b_size); + ts = walltimestamp; + cnt++; +} + +sdt:zfs::arc-hit,sdt:zfs::arc-miss +/ this->b_size / +{ /* Determine if this is data or Metadata, if argument is 0, + it is data, else 1 is Metadata */ + /* @t[this->b_type = ((arc_buf_hdr_t *)arg0)->b_type == 0 ? + "Data" : "Metadata"] = quantize(((arc_buf_hdr_t *)arg0)->b_type); => + Attempted to quantize this, but it makes little sense, since there are + only two options, with regard to b_type (0,1) */ + @t[this->b_type = ((arc_buf_hdr_t *)arg0)->b_type == 0 ? + "Data" : "Metadata"] = count(); +} + +tick-1sec +/ this->b_size && cnt > 0 / +{ + printf("\n\n%Y\n", ts); + printa("\tBytes: %@d\n", @b); + printa("\tType (%s): %@d ", @t); + /* printa("\n Type(0->data, 1->metadata): %@d\n", @t); */ + clear(@b); clear(@t); + cnt = 0; /* Reset counter for next round */ +} diff --git a/zfsbiorwsize.d b/zfsbiorwsize.d new file mode 100644 index 0000000..6693a2b --- /dev/null +++ b/zfsbiorwsize.d @@ -0,0 +1,105 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +dtrace:::BEGIN +{ + printf("Tracing... Hit Ctrl-C to end.\n"); +} + +fbt::zfs_read:entry, fbt::zfs_write:entry +{ + this->startzfsio = 1; + start_time[arg0] = timestamp; + /* Storing buffer size to make sure that io:::done probes are triggered + only when this value is greater than 0 */ + this->path = args[0]->v_path; + this->size = args[1]->uio_resid; +} + + +io:::start +/ this->startzfsio / +{ + /* Storing buffer size to make sure that io:::done probes are triggered + only when this value is greater than 0 */ + trig = args[0]->b_bufsize; +} + +io:::done + +/ this->startzfsio && (args[0]->b_flags & B_READ) && (this->start = start_time[arg0]) / +{ + this->delta = (timestamp - this->start) / 1000; + printf("[READ] Device: %5s Size (bytes): %5d Time (us): %5u \n", + args[1]->dev_statname, args[0]->b_bcount, this->delta); +/* @plots[args[1]->dev_statname, args[0]->b_bcount] = quantize(this->delta); */ + /* Figure out average byte size*/ + this->size = args[0]->b_bcount; + @bsize["average read, bytes"] = avg(this->size); + @plots["read I/O, us"] = quantize(this->delta); + @avgs["average read I/O, us"] = avg(this->delta); + start_time[arg0] = 0; +} + +io:::done +/ this->startzfsio && !(args[0]->b_flags & B_READ) && (this->start = start_time[arg0]) / +{ + this->delta = (timestamp - this->start) / 1000; + printf("[WRITE] Device: %5s Size (bytes): %5d Time (us): %5u \n", + args[1]->dev_statname, args[0]->b_bcount, this->delta); +/* @plots[args[1]->dev_statname, args[0]->b_bcount] = quantize(this->delta); */ + /* Figure out average byte size*/ + this->size = args[0]->b_bcount; + @bsize["average write, bytes"] = avg(this->size); + @plots["write I/O, us"] = quantize(this->delta); + @avgs["average write I/O, us"] = avg(this->delta); + start_time[arg0] = 0; +} + +fbt::zfs_read:return +/ this->start = start_time[arg0] / +{ + this->delta = (timestamp - this->start) / 1000; + printf("[READ] Device: %5s Size (bytes): %5d Time (us): %5u \n", + stringof(this->path), this->size, this->delta); +/* @plots[args[1]->dev_statname, args[0]->b_bcount] = quantize(this->delta); */ + /* Figure out average byte size*/ + /* this->size = args[0]->b_bcount; */ + @zbsize["average read, bytes"] = avg(this->size); + @zplots["read I/O, us"] = quantize(this->delta); + @zavgs["average read I/O, us"] = avg(this->delta); + start_time[arg0] = 0; + this->startzfsio = 0; +} + +fbt::zfs_write:return +/ this->start = start_time[arg0] / +{ + this->delta = (timestamp - this->start) / 1000; + printf("[WRITE] Device: %5s Size (bytes): %5d Time (us): %5u \n", + stringof(this->path), this->size, this->delta); +/* @plots[args[1]->dev_statname, args[0]->b_bcount] = quantize(this->delta); */ + /* Figure out average byte size*/ + /* this->size = args[0]->b_bcount; */ + @zbsize["average write, bytes"] = avg(this->size); + @zplots["write I/O, us"] = quantize(this->delta); + @zavgs["average write I/O, us"] = avg(this->delta); + start_time[arg0] = 0; + this->startzfsio = 0; +} + +dtrace:::END +/trig > 0/ +{ + printf("\nI/O completed time and size summary:\n\n"); + printa("\t%s %@d\n", @avgs); + printa("\t%s %@d\n", @bsize); + printa("\n %s\n%@d\n", @plots); + trig = 0; + printf("\nI/O completed time and size summary:\n\n"); + printa("\t%s %@d\n", @zavgs); + printa("\t%s %@d\n", @zbsize); + printa("\n %s\n%@d\n", @zplots); + trig = 0; +} diff --git a/zfsiosizebyfile.d b/zfsiosizebyfile.d new file mode 100644 index 0000000..838c4c1 --- /dev/null +++ b/zfsiosizebyfile.d @@ -0,0 +1,64 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet +#pragma D option dynvarsize=4m +#pragma D option switchrate=10hz + +/* This script quantizes IO by path, when first argument given is +a partial path. Normally, expecting to see a partial path to data +on a ZFS dataset. Since `/volumes` is the default path under which +all datasets are mounted, we would expect to see /volumes/poolname... +*/ + +dtrace:::BEGIN +{ + /* printf("%-12s %40s\n", "OP SIZE", "PATH"); */ + printf("Tracing... Hit Ctrl-C to end.\n"); + path = $1; + read = 0; + write = 0; + +} + +/* see uts/common/fs/zfs/zfs_vnops.c */ + +fbt::zfs_read:entry +/* Match first parameter on command line, +expecting partial path, like /volumes/poolname */ + +/* If we cannot extract path information, we ignore. */ +/args[0]->v_path != NULL && strstr(stringof(args[0]->v_path), path) != NULL/ +{ + read = 1; + self->path = args[0]->v_path; + self->bytes = args[1]->uio_resid; + + @rbytes["read bytes", self->path != NULL ? stringof(self->path) : ""] = quantize(self->bytes); +} + +fbt::zfs_write:entry +/* Match first parameter on command line, +expecting partial path, like /volumes/poolname */ + +/* If we cannot extract path information, we ignore. */ +/args[0]->v_path != NULL && strstr(stringof(args[0]->v_path), path) != NULL/ +{ + write = 1; + self->path = args[0]->v_path; + self->bytes = args[1]->uio_resid; + + @wbytes["write bytes", self->path != NULL ? stringof(self->path) : ""] = quantize(self->bytes); + +} + +dtrace:::END +/read > 0/ +{ + printa("%-16s Path: %s %@d\n", @rbytes); +} + +dtrace:::END +/write > 0/ +{ + printa("%-16s Path: %s %@d\n", @wbytes); +} \ No newline at end of file diff --git a/zfslatency.d b/zfslatency.d new file mode 100644 index 0000000..b66b669 --- /dev/null +++ b/zfslatency.d @@ -0,0 +1,59 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet +#pragma D option defaultargs +#pragma D option switchrate=10hz + +dtrace:::BEGIN +{ + rx = 0; wx = 0; +} + +/* see uts/common/fs/zfs/zfs_vnops.c */ + +fbt::zfs_read:entry, +fbt::zfs_write:entry, +fbt::zfs_fsync:entry +{ + self->path = args[0]->v_path; + self->bytes = ((uio_t *)arg1)->uio_resid; + self->start = timestamp; + self->id = self->start+self->bytes; +} + +fbt::zfs_read:return +/ self->bytes != NULL && self->start+self->bytes == self->id / + +{ + this->latency = (timestamp - self->start) / 1000; + this->ops = "read"; + @bytes["bytes"] = sum(self->bytes); + @latency["Dist (bytes/us):", this->ops] = quantize(self->bytes/this->latency); +} + +fbt::zfs_write:return +/ self->bytes != NULL && self->start+self->bytes == self->id / + +{ + this->latency = (timestamp - self->start) / 1000; + this->ops = "write"; + @latency["Dist (bytes/us):", this->ops] = quantize(self->bytes/this->latency); +} + +fbt::zfs_read:return, +fbt::zfs_write:return +/ self->bytes != NULL && self->start+self->bytes == self->id / + +{ + self->start = 0; + self->bytes = 0; + self->id = 0; +} + +tick-30sec +{ + printa("%18s %s %@d\n", @latency); + printa(@bytes); + trunc(@latency); + trunc(@bytes); +} \ No newline at end of file diff --git a/zfsrwtimesize.d b/zfsrwtimesize.d new file mode 100644 index 0000000..e868aed --- /dev/null +++ b/zfsrwtimesize.d @@ -0,0 +1,66 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +dtrace:::BEGIN +{ + printf("Tracing... Hit Ctrl-C to end.\n"); +} + +fbt::zfs_read:entry, fbt::zfs_write:entry +{ + self->ts = timestamp; + /* Storing buffer size to make sure that io:::done probes are triggered + only when this value is greater than 0 */ + self->path = args[0]->v_path; + self->size = args[1]->uio_resid; +} + +fbt::zfs_write:return +/ self->ts / +{ + this->type = probefunc == "zfs_write" ? "Write" : "Read"; + this->delta = (timestamp - self->ts); + /* printf("[WRITE] Device: %5s Size (bytes): %5d Time (us): %5u \n", + stringof(self->path), self->size, this->delta); */ + this->p = self->path == NULL ? "Unknown" : self->path; + @writes[stringof(this->p)] = sum(self->size/(1024)); +/* @plots[args[1]->dev_statname, args[0]->b_bcount] = quantize(this->delta); */ + /* Figure out average byte size*/ + /* this->size = args[0]->b_bcount; */ + @bsize["average write, bytes"] = avg(self->size); + @plots["write I/O, us"] = quantize(this->delta/1000); + @avgs["average write I/O, us"] = avg(this->delta/1000); + this->delta = 0; + self->ts = 0; +} + +fbt::zfs_read:return +/ self->ts / +{ + this->type = probefunc == "zfs_write" ? "Write" : "Read"; + this->delta = (timestamp - self->ts); + /* printf("[READ] Device: %5s Size (bytes): %5d Time (us): %5u \n", + stringof(self->path), self->size, this->delta); */ + this->p = self->path == NULL ? "Unknown" : self->path; + @reads[stringof(this->p)] = sum(self->size/(1024)); +/* @plots[args[1]->dev_statname, args[0]->b_bcount] = quantize(this->delta); */ + /* Figure out average byte size*/ + /* this->size = args[0]->b_bcount; */ + @bsize["average read, bytes"] = avg(self->size); + @plots["read I/O, us"] = quantize(this->delta/1000); + @avgs["average read I/O, us"] = avg(this->delta/1000); + this->delta = 0; + self->ts = 0; +} + +dtrace:::END +{ + printf("\nI/O completed time and size summary:\n\n"); + printa("\t%s %@d\n", @avgs); + printa("\t%s %@d\n", @bsize); + printa("\n %s\n%@d\n", @plots); + trunc(@reads, 10); trunc(@writes, 10); + printa("[READ] Device: %5s KBytes: %@d\n", @reads); + printa("[WRITE] Device: %5s KBytes: %@d\n", @writes); +} diff --git a/zfsslower.d b/zfsslower.d new file mode 100644 index 0000000..5aec754 --- /dev/null +++ b/zfsslower.d @@ -0,0 +1,81 @@ +#!/usr/sbin/dtrace -s +/* + * zfsslower.d show ZFS I/O taking longer than given ms. + * + * USAGE: zfsslower.d min_ms + * eg, + * zfsslower.d 100 # show I/O at least 100 ms + * + * This is from the DTrace book, chapter 5. It has been enhanced to include + * zfs_readdir() as well, which is shown in the "D" (direction) field as "D". + * + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at http://smartos.org/CDDL + * + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file. + * + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Copyright (c) 2012 Joyent Inc., All rights reserved. + * Copyright (c) 2012 Brendan Gregg, All rights reserved. + * + * TESTED: this fbt provider based script may only work on some OS versions. + * 121: ok + */ + +#pragma D option quiet +#pragma D option defaultargs +#pragma D option switchrate=10hz + +dtrace:::BEGIN +{ + printf("%-20s %-16s %1s %4s %6s %s\n", "TIME", "PROCESS", + "D", "KB", "ms", "FILE"); + min_ns = $1 * 1000000; +} + +/* see uts/common/fs/zfs/zfs_vnops.c */ + +fbt::zfs_read:entry, fbt::zfs_write:entry +{ + self->path = args[0]->v_path; + self->kb = args[1]->uio_resid / 1024; + self->start = timestamp; +} + +fbt::zfs_readdir:entry +{ + self->path = args[0]->v_path; + self->kb = 0; + self->start = timestamp; +} + +fbt::zfs_read:return, fbt::zfs_write:return, fbt::zfs_readdir:return +/self->start && (timestamp - self->start) >= min_ns/ +{ + this->iotime = (timestamp - self->start) / 1000000; + this->dir = probefunc == "zfs_read" ? "R" : + probefunc == "zfs_write" ? "W" : "D"; + printf("%-20Y %-16s %1s %4d %6d %s\n", walltimestamp, + execname, this->dir, self->kb, this->iotime, + self->path != NULL ? stringof(self->path) : ""); +} + +fbt::zfs_read:return, fbt::zfs_write:return, fbt::zfs_readdir:return +{ + self->path = 0; self->kb = 0; self->start = 0; +} \ No newline at end of file diff --git a/zfsslower_q.d b/zfsslower_q.d new file mode 100644 index 0000000..0840823 --- /dev/null +++ b/zfsslower_q.d @@ -0,0 +1,84 @@ +#!/usr/sbin/dtrace -s +/* + * zfsslower.d show ZFS I/O taking longer than given ms. + * + * USAGE: zfsslower.d min_ms + * eg, + * zfsslower.d 100 # show I/O at least 100 ms + * + * This is from the DTrace book, chapter 5. It has been enhanced to include + * zfs_readdir() as well, which is shown in the "D" (direction) field as "D". + * + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at http://smartos.org/CDDL + * + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file. + * + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Copyright (c) 2012 Joyent Inc., All rights reserved. + * Copyright (c) 2012 Brendan Gregg, All rights reserved. + * + * TESTED: this fbt provider based script may only work on some OS versions. + * 121: ok + */ + +#pragma D option quiet +#pragma D option defaultargs +#pragma D option switchrate=10hz + +dtrace:::BEGIN +{ + printf("%-20s %-16s %1s %4s %6s %s\n", "TIME", "PROCESS", + "D", "KB", "ms", "FILE"); + min_ns = $1 * 1000000; +} + +/* see uts/common/fs/zfs/zfs_vnops.c */ + +fbt::zfs_read:entry, fbt::zfs_write:entry +{ + self->path = args[0]->v_path; + self->kb = args[1]->uio_resid / 1024; + self->start = timestamp; +} + +fbt::zfs_readdir:entry +{ + self->path = args[0]->v_path; + self->kb = 0; + self->start = timestamp; +} + +fbt::zfs_read:return, fbt::zfs_write:return, fbt::zfs_readdir:return +/self->start && (timestamp - self->start) >= min_ns/ +{ + this->iotime = (timestamp - self->start) / 1000; +/* this->dir = probefunc == "zfs_read" ? "R" : + probefunc == "zfs_write" ? "W" : "D"; + printf("%-20Y %-16s %1s %4d %6d %s\n", walltimestamp, + execname, this->dir, self->kb, this->iotime, + self->path != NULL ? stringof(self->path) : ""); */ + + @zfsIOlat[ this->dir = probefunc == "zfs_read" ? "R" : + probefunc == "zfs_write" ? "W" : "D" ] = quantize(this->iotime); +} + +fbt::zfs_read:return, fbt::zfs_write:return, fbt::zfs_readdir:return +{ + self->path = 0; self->kb = 0; self->start = 0; +} \ No newline at end of file diff --git a/zfssnoop.d b/zfssnoop.d new file mode 100644 index 0000000..ab30447 --- /dev/null +++ b/zfssnoop.d @@ -0,0 +1,59 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet +#pragma D option switchrate=10hz + +/* +Collect information about file calls against ZFS +filesystems and at termination aggregate counts +for each call +*/ + +dtrace:::BEGIN +{ + printf("%-21s %6s %6s %-12.12s %-12s %-4s %s\n", "TIMESTAMP", "UID", + "PID", "PROCESS", "CALL", "KB", "PATH"); +} + +/* see uts/common/fs/zfs/zfs_vnops.c */ + +fbt::zfs_read:entry, fbt::zfs_write:entry +{ + self->path = args[0]->v_path; + self->kb = args[1]->uio_resid / 1024; + self->time = walltimestamp; +} + +fbt::zfs_open:entry +{ + self->path = (*args[0])->v_path; + self->kb = 0; + self->time = walltimestamp; +} + +fbt::zfs_close:entry, fbt::zfs_ioctl:entry, fbt::zfs_getattr:entry, +fbt::zfs_readdir:entry +{ + self->path = args[0]->v_path; + self->kb = 0; + self->time = walltimestamp; +} + +fbt::zfs_read:entry, fbt::zfs_write:entry, fbt::zfs_open:entry, +fbt::zfs_close:entry, fbt::zfs_ioctl:entry, fbt::zfs_getattr:entry, +fbt::zfs_readdir:entry +{ + @[probefunc,stringof(self->path)] = count(); + printf("%-21Y %6d %6d %-12.12s %-12s %-4d %s\n", self->time, + uid, pid, execname, probefunc, self->kb, + self->path != NULL ? stringof(self->path) : ""); + self->path = 0; self->kb = 0; +} + +dtrace:::END +{ + printf("%s\n", " ... "); + printf("%-12s %-60s %-8s\n", "CALL", "PATH", "COUNT"); + printa("%-12s %-60s %@-8d\n", @); + exit(0); +} diff --git a/ziolatency.d b/ziolatency.d index 6fecb14..4b0b9f7 100644 --- a/ziolatency.d +++ b/ziolatency.d @@ -1,11 +1,7 @@ #!/usr/sbin/dtrace -s #pragma D option quiet -#pragma D option dynvarsize=40m - -/* Description: Trace I/O latency on a per-vdev basis. Script will not be maintained for the foreseeable future. */ -/* Author: Kirill.Davydychev@Nexenta.com */ -/* Copyright 2012, Nexenta Systems, Inc. All rights reserved. */ +#pragma D option dynvarsize=4m dtrace:::BEGIN { @@ -32,40 +28,23 @@ fbt::zio_vdev_io_start:entry } fbt::zio_vdev_io_done:entry -/args[0]->io_type != 0 && start_time[arg0] > 0/ /* && args[0]->io_vd && args[0]->io_vd->vdev_path */ +/args[0]->io_type != 0 && start_time[arg0] > 0 && args[0]->io_vd && args[0]->io_vd->vdev_path/ { this->iotime = (timestamp - start_time[arg0])/1000; + this->path = stringof(args[0]->io_vd->vdev_path); this->zpool = stringof(args[0]->io_spa->spa_name); this->iotype = ziotype[args[0]->io_type]; this->childtype = ziochild[args[0]->io_child_type]; - this->path = args[0]->io_vd ? - args[0]->io_vd->vdev_path ? - stringof(args[0]->io_vd->vdev_path) : - "top_level_vdev" : - "pool"; - - this->vdev_id = args[0]->io_vd ? - args[0]->io_vd->vdev_id : - 404; /* Not Found (pool) */ - this->vdev_pa = args[0]->io_vd ? - args[0]->io_vd->vdev_parent ? - args[0]->io_vd->vdev_parent->vdev_id : - 12455 : /* L2ARC has no parent - set to 12455 (L2ArcSSd) */ - 404; /* Not found */ + this->vdev_id = args[0]->io_vd->vdev_id; /* XXX - Describe abnormal behaviors to watch out for */ - @latency[this->zpool, this->childtype, this->vdev_pa, this->vdev_id, this->iotype, this->path] = quantize(this->iotime); - @avg_lat[this->zpool, this->childtype, this->vdev_pa, this->vdev_id, this->iotype, this->path] = avg(this->iotime); - @sum_lat[this->zpool, this->childtype, this->vdev_pa, this->vdev_id, this->iotype, this->path] = sum(this->iotime); + @latency[this->zpool, this->vdev_id, this->iotype, this->childtype, this->path] = quantize(this->iotime); + @avg_lat[this->zpool, this->vdev_id, this->iotype, this->childtype, this->path] = avg(this->iotime); } dtrace:::END { - printa("ZPool: %12s IOChild: %7s ParentVdevID: %5d ThisVdevID: %3d IOType: %5s Disk: %s\t Latency distribution:%@d\n",@latency); - printa("ZPool: %12s IOChild: %7s ParentVdevID: %5d ThisVdevID: %3d IOType: %5s Disk: %s\t AvgLatency(us): %@d\n",@avg_lat); - printa("ZPool: %12s IOChild: %7s ParentVdevID: %5d ThisVdevID: %3d IOType: %5s Disk: %s\t TotLatency(us): %@d\n",@sum_lat); - trunc(@latency); - trunc(@avg_lat); - trunc(@sum_lat); + printa("ZPool: %s\tChildVdevID: %d\tIOType: %s\tIOChild: %s\tDisk: %s\t Latency distribution(us):%@d\n",@latency); + printa("ZPool: %s\tChildVdevID: %d\tIOType: %s\tIOChild: %s\tDisk: %s\t AvgLatency(us): %@d\n",@avg_lat); }