From acd3d27d266f0e234f1c182413e4b97e1e435021 Mon Sep 17 00:00:00 2001 From: Sam Zaydel Date: Fri, 21 Dec 2012 09:19:25 -0800 Subject: [PATCH 1/4] Adding my own dtrace stuff to forked repo. --- ab_scsi.d | 942 +++++++++++++++++++++++++++++++++++++ arcaccess.d | 23 + cifsops.d | 21 + counter.d | 16 + diskioerr.d | 82 ++++ disklatency.d | 27 ++ disklatency2.d | 38 ++ disklatencygen.d | 27 ++ disklatencyrw.d | 103 ++++ downstack.d | 23 + filemon_zfs.d | 93 ++++ findddt.d | 31 ++ hotfile.d | 98 ++++ io_slower_than_50ms.d | 91 ++++ iofile.d | 79 ++++ iofileb.d | 59 +++ ioinfopervdev.d | 10 + iscsi-inactivity-monitor.d | 84 ++++ iscsiio.d | 61 +++ iscsirwlat.d | 69 +++ iscsisnoop.d | 71 +++ iscsiterr2.d | 231 +++++++++ iscsiwho.d | 33 ++ learn-d.d | 6 + loadavg.d | 17 + metaslab_free.d | 53 +++ nfs-l.d | 252 ++++++++++ nfsrpcattr.d | 31 ++ nfsv3errors.d | 70 +++ nfsv3io.d | 48 ++ nfsv3ops.d | 21 + nfsv3rwsnoop-2.d | 27 ++ processtrace.d | 30 ++ rwtime.d | 12 + rwtimesize.d | 56 +++ scsi.d | 942 +++++++++++++++++++++++++++++++++++++ scsilatency.d | 58 +++ sdretry.d | 23 + sdretry1.d | 23 + spa_sync.d | 48 ++ spa_sync__.d | 41 ++ spacemapalloc_lat.d | 24 + spasync.d | 52 ++ tcp_rtt.d | 33 ++ trace_arc.d | 240 ++++++++++ trussrw.d | 31 ++ txg_monitor.d | 2 +- uberupd.d | 38 ++ watch-arc-shrink.d | 13 + zfs_perds_io_latency.d | 57 +++ zfs_vdev_iostart.d | 52 ++ zfs_write_throttle.d | 103 ++++ zfsarcblocksize.d | 47 ++ zfsbiorwsize.d | 105 +++++ zfsiosizebyfile.d | 64 +++ zfslatency.d | 64 +++ zfsrwtimesize.d | 56 +++ zfsslower.d | 81 ++++ zfsslower_q.d | 84 ++++ zfssnoop.d | 59 +++ ziolatency.d | 37 +- 61 files changed, 5282 insertions(+), 30 deletions(-) create mode 100644 ab_scsi.d create mode 100644 arcaccess.d create mode 100644 cifsops.d create mode 100644 counter.d create mode 100644 diskioerr.d create mode 100644 disklatency.d create mode 100644 disklatency2.d create mode 100644 disklatencygen.d create mode 100644 disklatencyrw.d create mode 100644 downstack.d create mode 100644 filemon_zfs.d create mode 100644 findddt.d create mode 100644 hotfile.d create mode 100644 io_slower_than_50ms.d create mode 100755 iofile.d create mode 100755 iofileb.d create mode 100644 ioinfopervdev.d create mode 100644 iscsi-inactivity-monitor.d create mode 100644 iscsiio.d create mode 100644 iscsirwlat.d create mode 100644 iscsisnoop.d create mode 100644 iscsiterr2.d create mode 100644 iscsiwho.d create mode 100644 learn-d.d create mode 100644 loadavg.d create mode 100644 metaslab_free.d create mode 100644 nfs-l.d create mode 100644 nfsrpcattr.d create mode 100644 nfsv3errors.d create mode 100644 nfsv3io.d create mode 100644 nfsv3ops.d create mode 100644 nfsv3rwsnoop-2.d create mode 100644 processtrace.d create mode 100644 rwtime.d create mode 100644 rwtimesize.d create mode 100644 scsi.d create mode 100644 scsilatency.d create mode 100644 sdretry.d create mode 100644 sdretry1.d create mode 100644 spa_sync.d create mode 100644 spa_sync__.d create mode 100644 spacemapalloc_lat.d create mode 100644 spasync.d create mode 100644 tcp_rtt.d create mode 100644 trace_arc.d create mode 100644 trussrw.d create mode 100644 uberupd.d create mode 100644 watch-arc-shrink.d create mode 100644 zfs_perds_io_latency.d create mode 100644 zfs_vdev_iostart.d create mode 100644 zfs_write_throttle.d create mode 100644 zfsarcblocksize.d create mode 100644 zfsbiorwsize.d create mode 100644 zfsiosizebyfile.d create mode 100644 zfslatency.d create mode 100644 zfsrwtimesize.d create mode 100644 zfsslower.d create mode 100644 zfsslower_q.d create mode 100644 zfssnoop.d diff --git a/ab_scsi.d b/ab_scsi.d new file mode 100644 index 0000000..b31a210 --- /dev/null +++ b/ab_scsi.d @@ -0,0 +1,942 @@ +#!/usr/sbin/dtrace -qCs + +/* + * Chris.Gerhard@sun.com + * Joel.Buckley@sun.com + */ + +#pragma ident "@(#)scsi.d 1.18 09/03/20 SMI" + +/* + * SCSI logging via dtrace. + * + * See http://blogs.sun.com/chrisg/tags/scsi.d + * + * Usage: + * + * scsi.d [ @T time ][ hba [ X ]] + * + * With no arguments it logs every scsi packet out and as many returns + * as it can find. + * + * So to trace all the io going via instance 2 qus driver do: + * + * scsi.d qus 2 + * + * To do the same trace for 20 seconds do: + * + * scsi.d @T 20 qus 2 + * + * NOTE: the "@" is used instead of the more traditional "-" as dtrace will + * continue to parse arguments so would see "-T" as an argument to dtrace and + * produce a usage error. + * + * Temporarily, ie I intend to change this in the future, scsi.d is also + * taking some more arguments using the -D options as well. I suspect I will + * get rid of the bizarre @T options above and change to using -D completely + * but that will be some point in the future, if at all. + * + * The new options are: + * + * -D EXECNAME='"foo"' + * Which results scsi.d only reporting IO associated with the application "foo". + * -D PRINT_STACK + * Which results in scsi.d printing a kernel stack trace after every outgoing + * packet. + * -D QUIET + * Which results in none of the packets being printed. Kind of pointless + * without another option. + * -D PERF_REPORT + * Which results in a report of how long IOs took aggregated per HBA useful + * with -D QUIET to get performance statistics. + * -D TARGET_STATS + * aggregate the stats based on the target. + * -D LUN_STATS + * aggregate the stats based on the LUN. Requires TARGET_STATS + * -D DYNVARSIZE + * pass this value to the #pragma D option dynvarsize= option. + * -D HBA + * the name of the HBA we are interested in. + * -D MIN_LBA + * Only report logical blocks over this value + * -D MAX_LBA + * Only IOs to report logical blocks that are less than this value. + * -D REPORT_OVERTIME=N + * Only report IOs that have taken longer than this number of nanoseconds. + * This only stops the printing of the packets not the collection of + * statistics. + * There are some tuning options that take effect only when + * REPORT_OVERTIME is set. These are: + * -D NSPEC=N + * Set the number of speculations to this value. + * -D SPECSIZE=N + * Set the size of the speculaton buffer. This should be 200 * + * the size of NSPEC. + * -D CLEANRATE=N + * Specify the clean rate. + * + * Finally scsi.d will also now accept the dtrace -c and -p options to trace + * just the commands or process given. + * + * Since dtrace does not output in real time it is useful to sort the output + * of the script using sort -n to get the entries in chronological order. + * + * NOTE: This does not directly trace what goes onto the scsi bus or fibre, + * to do so would require this script have knowledge of every HBA that could + * ever be connected to a system. It traces the SCSI packets as they are + * passed from the target driver to the HBA in the SCSA layer and then back + * again. Although to get the packet when it is returned it guesses that the + * packet will be destroyed using scsi_destroy_pkt and not modified before it + * is. So far this has worked but there is no garauntee that it will work for + * all HBAs and target drivers in the future. + * + */ + +/* + * This can be used as a framework to report on any rountine that has + * a scsi_pkt passed into it. Search for "FRAMEWORK" to see where you + * have to add bits. + */ +#pragma D option quiet +#pragma D option defaultargs + +#ifdef DYNVARSIZE +#pragma D option dynvarsize=DYNVARSIZE +#endif + +#ifdef REPORT_OVERTIME + +#ifdef NSPEC +#pragma D option nspec=NSPEC +#else +#pragma D option nspec=1000 +#endif /* NSPEC */ + + +#ifdef SPECSIZE +#pragma D option specsize=SPECSIZE +#else +#pragma D option specsize=200000 +#endif /* SPECSIZE */ + +#ifdef CLEANRATE +#pragma D option cleanrate=CLEANRATE +#endif /* CLEANRATE */ + +#endif /* REPORT_OVERTIME */ + +/* + * Have to include scsi_types.h so that in releases where scsi_address + * contains a union and #defines for a_target, a_lun & a_sublun they get + * picked up. + */ +#include + +/* + * Now work around CR 6798038. Depending on how that CR is fixed this + * workaround may have to be revisited so that the script will work in + * all cases. + */ +#ifdef a_lun +#undef a_lun +#define my_alun a.spi.a_lun +#else +#define my_alun a_lun +#endif + +#ifdef a_target +#undef a_target +#define my_atarget a.spi.a_target +#else +#define my_atarget a_target +#endif + +/* + * Just for completeness. The script does not use sublun so these are not + * needed but do no harm. + */ +#ifdef a_sublun +#undef a_sublun +#define my_asublun a.spi.a_sublun +#else +#define my_asublun a_sublun +#endif + +/* + * Set both lba & len to 64Bit values, to avoid type mismatch later... + * + * The following does not function due to DTrace insufficient registers... + * + * #define INT64(A, B, C, D, E, F, G, H) ( \ + * (((A) & 0x0ffL) << 56) | \ + * (((B) & 0x0ffL) << 48) | \ + * (((C) & 0x0ffL) << 40) | \ + * (((D) & 0x0ffL) << 32) | \ + * (((E) & 0x0ffL) << 24) | \ + * (((F) & 0x0ffL) << 16) | \ + * (((G) & 0x0ffL) << 8) | \ + * ((H) & 0x0ffL)) + * + * Instead, calculate lower & upper values, then pass them into this macro: + */ +#define INT64(A, B) ( \ + (((A) & 0x0ffffffffLL) << 32) | \ + ((B) & 0x0ffffffffL)) + +#define INT32(A, B, C, D) ( \ + (((A) & 0x0ffL) << 24) | \ + (((B) & 0x0ffL) << 16) | \ + (((C) & 0x0ffL) << 8) | \ + ((D) & 0x0ffL)) +#define INT16(A, B) ( \ + (((A) & 0x0ffL) << 8) | \ + ((B) & 0x0ffL)) +#define INT8(A) ( \ + ((A) & 0x0ffL)) + +/* #define A_TO_TRAN(ap) ((ap)->a_hba_tran) */ +#define P_TO_TRAN(pkt) ((pkt)->pkt_address.a_hba_tran) +/* #define P_TO_ADDR(pkt) (&((pkt)->pkt_address)) */ +#define P_TO_DEVINFO(pkt) ((struct dev_info *)(P_TO_TRAN(pkt)->tran_hba_dip)) + +#define DEV_NAME(pkt) \ + stringof(`devnamesp[P_TO_DEVINFO(pkt)->devi_major].dn_name) /* ` */ + +#define DEV_INST(pkt) (P_TO_DEVINFO(pkt)->devi_instance) + +#ifdef MIN_BLOCK +#define MIN_TEST && this->lba >= (MIN_BLOCK) +#else +#define MIN_TEST +#endif +#ifdef MAX_BLOCK +#define MAX_TEST && (this->lba <= ((MAX_BLOCK) + this->len)) +#else +#define MAX_TEST +#endif + +#define ARGUMENT_TEST (this->pkt && (hba_set == 0 || \ + hba == DEV_NAME(this->pkt) && \ + (inst_set == 0 || inst == DEV_INST(this->pkt))) MIN_TEST MAX_TEST) + +/* + * PRINT_DEV_FROM_PKT + * + * From a scsi_pkt get the name of the driver and the instance of the + * hba. + */ +#define PRINT_DEV_FROM_PKT(pkt) \ + printf("%s%d:", DEV_NAME(pkt), DEV_INST(pkt)) + +#define PRINT_TIMESTAMP() printf("%5.5d.%9.9d ", \ + (timestamp - script_start_time)/1000000000, \ + (timestamp - script_start_time)%1000000000); + +/* + * Look away now. + * + * This is just hackery to get around me being so stubborn I + * don't want to wrap this in a shell script. + */ +#ifdef HBA +BEGIN { + hba = HBA; + hba_set = 1; + timeout = 0; + inst_set = 0; + inst = 0; +} +#else + +#define TIMEOUT_SELECT "@T" + +#define DO_ARGS(N, NS, M, MS) \ +BEGIN \ +/ hba_set == 0 && NS != "" && NS != TIMEOUT_SELECT / \ +{ \ + hba = NS; \ + hba_set = 1; \ +} \ +BEGIN \ +/ hba_set == 1 && inst_set == 0 && MS != "" / \ +{ \ + inst = M; \ + inst_set = 1; \ +} \ + +BEGIN +/ $$1 == TIMEOUT_SELECT / +{ + timeout = $2; +} + +#endif + +BEGIN +/ $$target != "" / +{ + target = $target; +} + +BEGIN +{ +#ifdef EXECNAME + scsi_d_target_execname = EXECNAME; +#else + scsi_d_target_execname = ""; +#endif +} +#ifndef HBA +DO_ARGS($1, $$1, $2, $$2) +DO_ARGS($3, $$3, $4, $$4) +#endif + +/* + * You can open your eyes again now. + */ + +BEGIN { +#ifdef REPORT_OVERTIME + printf("Only reporting IOs longer than %dns\n", REPORT_OVERTIME); +#endif + printf("Hit Control C to interrupt\n"); + script_start_time = timestamp; + end_time = timestamp + (timeout * 1000000000); + + scsi_ops[0x000, 0x0] = "TEST_UNIT_READY"; + scsi_ops[0x001, 0x0] = "REZERO_UNIT_or_REWIND"; + scsi_ops[0x003, 0x0] = "REQUEST_SENSE"; + scsi_ops[0x004, 0x0] = "FORMAT_UNIT"; + scsi_ops[0x005, 0x0] = "READ_BLOCK_LIMITS"; + scsi_ops[0x006, 0x0] = "Unknown(06)"; + scsi_ops[0x007, 0x0] = "REASSIGN_BLOCKS"; + scsi_ops[0x008, 0x0] = "READ(6)"; + scsi_ops[0x009, 0x0] = "Unknown(09)"; + scsi_ops[0x00a, 0x0] = "WRITE(6)"; + scsi_ops[0x00b, 0x0] = "SEEK(6)"; + scsi_ops[0x00c, 0x0] = "Unknown(0c)"; + scsi_ops[0x00d, 0x0] = "Unknown(0d)"; + scsi_ops[0x00e, 0x0] = "Unknown(0e)"; + scsi_ops[0x00f, 0x0] = "READ_REVERSE"; + scsi_ops[0x010, 0x0] = "WRITE_FILEMARK"; + scsi_ops[0x011, 0x0] = "SPACE"; + scsi_ops[0x012, 0x0] = "INQUIRY"; + scsi_ops[0x013, 0x0] = "VERIFY"; + scsi_ops[0x014, 0x0] = "Unknown(14)"; + scsi_ops[0x015, 0x0] = "MODE_SELECT(6)"; + scsi_ops[0x016, 0x0] = "RESERVE(6)"; + scsi_ops[0x017, 0x0] = "RELEASE(6)"; + scsi_ops[0x018, 0x0] = "COPY"; + scsi_ops[0x019, 0x0] = "ERASE(6)"; + scsi_ops[0x01a, 0x0] = "MODE_SENSE(6)"; + scsi_ops[0x01b, 0x0] = "START_STOP_UNIT"; + scsi_ops[0x01c, 0x0] = "RECIEVE_DIAGNOSTIC_RESULTS"; + scsi_ops[0x01d, 0x0] = "SEND_DIAGNOSTIC"; + scsi_ops[0x01e, 0x0] = "PREVENT_ALLOW_MEDIUM_REMOVAL"; + scsi_ops[0x01f, 0x0] = "Unknown(1f)"; + scsi_ops[0x020, 0x0] = "Unknown(20)"; + scsi_ops[0x021, 0x0] = "Unknown(21)"; + scsi_ops[0x022, 0x0] = "Unknown(22)"; + scsi_ops[0x023, 0x0] = "READ_FORMAT_CAPACITY"; + scsi_ops[0x024, 0x0] = "Unknown(24)"; + scsi_ops[0x025, 0x0] = "READ_CAPACITY(10)"; + scsi_ops[0x026, 0x0] = "Unknown(26)"; + scsi_ops[0x027, 0x0] = "Unknown(27)"; + scsi_ops[0x028, 0x0] = "READ(10)"; + scsi_ops[0x02a, 0x0] = "WRITE(10)"; + scsi_ops[0x02b, 0x0] = "SEEK(10)_or_LOCATE(10)"; + scsi_ops[0x02e, 0x0] = "WRITE_AND_VERIFY(10)"; + scsi_ops[0x02f, 0x0] = "VERIFY(10)"; + scsi_ops[0x030, 0x0] = "SEARCH_DATA_HIGH"; + scsi_ops[0x031, 0x0] = "SEARCH_DATA_EQUAL"; + scsi_ops[0x032, 0x0] = "SEARCH_DATA_LOW"; + scsi_ops[0x033, 0x0] = "SET_LIMITS(10)"; + scsi_ops[0x034, 0x0] = "PRE-FETCH(10)"; + scsi_ops[0x035, 0x0] = "SYNCHRONIZE_CACHE(10)"; + scsi_ops[0x036, 0x0] = "LOCK_UNLOCK_CACHE(10)"; + scsi_ops[0x037, 0x0] = "READ_DEFECT_DATA(10)"; + scsi_ops[0x039, 0x0] = "COMPARE"; + scsi_ops[0x03a, 0x0] = "COPY_AND_WRITE"; + scsi_ops[0x03b, 0x0] = "WRITE_BUFFER"; + scsi_ops[0x03c, 0x0] = "READ_BUFFER"; + scsi_ops[0x03e, 0x0] = "READ_LONG"; + scsi_ops[0x03f, 0x0] = "WRITE_LONG"; + scsi_ops[0x040, 0x0] = "CHANGE_DEFINITION"; + scsi_ops[0x041, 0x0] = "WRITE_SAME(10)"; + scsi_ops[0x04c, 0x0] = "LOG_SELECT"; + scsi_ops[0x04d, 0x0] = "LOG_SENSE"; + scsi_ops[0x050, 0x0] = "XDWRITE(10)"; + scsi_ops[0x051, 0x0] = "XPWRITE(10)"; + scsi_ops[0x052, 0x0] = "XDREAD(10)"; + scsi_ops[0x053, 0x0] = "XDWRITEREAD(10)"; + scsi_ops[0x055, 0x0] = "MODE_SELECT(10)"; + scsi_ops[0x056, 0x0] = "RESERVE(10)"; + scsi_ops[0x057, 0x0] = "RELEASE(10)"; + scsi_ops[0x05a, 0x0] = "MODE_SENSE(10)"; + scsi_ops[0x05e, 0x0] = "PERSISTENT_RESERVE_IN"; + scsi_ops[0x05f, 0x0] = "PERSISTENT_RESERVE_OUT"; + scsi_ops[0x07f, 0x0] = "Variable_Length_CDB"; + scsi_ops[0x07f, 0x3] = "XDREAD(32)"; + scsi_ops[0x07f, 0x4] = "XDWRITE(32)"; + scsi_ops[0x07f, 0x6] = "XPWRITE(32)"; + scsi_ops[0x07f, 0x7] = "XDWRITEREAD(32)"; + scsi_ops[0x07f, 0x9] = "READ(32)"; + scsi_ops[0x07f, 0xb] = "WRITE(32)"; + scsi_ops[0x07f, 0xa] = "VERIFY(32)"; + scsi_ops[0x07f, 0xc] = "WRITE_AND_VERIFY(32)"; + scsi_ops[0x080, 0x0] = "XDWRITE_EXTENDED(16)"; + scsi_ops[0x081, 0x0] = "REBUILD(16)"; + scsi_ops[0x082, 0x0] = "REGENERATE(16)"; + scsi_ops[0x083, 0x0] = "EXTENDED_COPY"; + scsi_ops[0x086, 0x0] = "ACCESS_CONTROL_IN"; + scsi_ops[0x087, 0x0] = "ACCESS_CONTROL_OUT"; + scsi_ops[0x088, 0x0] = "READ(16)"; + scsi_ops[0x08a, 0x0] = "WRITE(16)"; + scsi_ops[0x08c, 0x0] = "READ_ATTRIBUTES"; + scsi_ops[0x08d, 0x0] = "WRITE_ATTRIBUTES"; + scsi_ops[0x08e, 0x0] = "WRITE_AND_VERIFY(16)"; + scsi_ops[0x08f, 0x0] = "VERIFY(16)"; + scsi_ops[0x090, 0x0] = "PRE-FETCH(16)"; + scsi_ops[0x091, 0x0] = "SYNCHRONIZE_CACHE(16)"; + scsi_ops[0x092, 0x0] = "LOCK_UNLOCK_CACHE(16)_or_LOCATE(16)"; + scsi_ops[0x093, 0x0] = "WRITE_SAME(16)_or_ERASE(16)"; + scsi_ops[0x09e, 0x0] = "SERVICE_IN_or_READ_CAPACITY(16)"; + scsi_ops[0x0a0, 0x0] = "REPORT_LUNS"; + scsi_ops[0x0a3, 0x0] = "MAINTENANCE_IN_or_REPORT_TARGET_PORT_GROUPS"; + scsi_ops[0x0a4, 0x0] = "MAINTENANCE_OUT_or_SET_TARGET_PORT_GROUPS"; + scsi_ops[0x0a7, 0x0] = "MOVE_MEDIUM"; + scsi_ops[0x0a8, 0x0] = "READ(12)"; + scsi_ops[0x0aa, 0x0] = "WRITE(12)"; + scsi_ops[0x0ae, 0x0] = "WRITE_AND_VERIFY(12)"; + scsi_ops[0x0af, 0x0] = "VERIFY(12)"; + scsi_ops[0x0b3, 0x0] = "SET_LIMITS(12)"; + scsi_ops[0x0b4, 0x0] = "READ_ELEMENT_STATUS"; + scsi_ops[0x0b7, 0x0] = "READ_DEFECT_DATA(12)"; + scsi_ops[0x0ba, 0x0] = "REDUNDANCY_GROUP_IN"; + scsi_ops[0x0bb, 0x0] = "REDUNDANCY_GROUP_OUT"; + scsi_ops[0x0bc, 0x0] = "SPARE_IN"; + scsi_ops[0x0bd, 0x0] = "SPARE_OUT"; + scsi_ops[0x0be, 0x0] = "VOLUME_SET_IN"; + scsi_ops[0x0bf, 0x0] = "VOLUME_SET_OUT"; + scsi_ops[0x0d0, 0x0] = "EXPLICIT_LUN_FAILOVER"; + scsi_ops[0x0f1, 0x0] = "STOREDGE_CONTROLLER"; + + scsi_reasons[0] = "COMPLETED"; + scsi_reasons[1] = "INCOMPLETE"; + scsi_reasons[2] = "DMA_ERR"; + scsi_reasons[3] = "TRAN_ERR"; + scsi_reasons[4] = "RESET"; + scsi_reasons[5] = "ABORTED"; + scsi_reasons[6] = "TIMEOUT"; + scsi_reasons[7] = "DATA_OVERRUN"; + scsi_reasons[8] = "COMMAND_OVERRUN"; + scsi_reasons[9] = "STATUS_OVERRUN"; + scsi_reasons[10] = "Bad_Message"; + scsi_reasons[11] = "No_Message_Out"; + scsi_reasons[12] = "XID_Failed"; + scsi_reasons[13] = "IDE_Failed"; + scsi_reasons[14] = "Abort_Failed"; + scsi_reasons[15] = "Reject_Failed"; + scsi_reasons[16] = "Nop_Failed"; + scsi_reasons[17] = "Message_Parity_Error_Failed"; + scsi_reasons[18] = "Bus_Device_Reset_Failed"; + scsi_reasons[19] = "Identify_Message_Rejected"; + scsi_reasons[20] = "Unexpected_Bus_free"; + scsi_reasons[21] = "Tag_Rejected"; + scsi_reasons[22] = "TERMINATED"; + scsi_reasons[24] = "Device_Gone"; + + scsi_state[0] = "Success"; + scsi_state[0x2] = "Check_Condition"; + scsi_state[0x4] = "Condition_Met"; + scsi_state[0x8] = "Busy"; + scsi_state[0x10] = "Intermidiate"; + scsi_state[0x14] = "Intermidiate-Condition_Met"; + scsi_state[0x18] = "Reservation_Conflict"; + scsi_state[0x22] = "Command_Terminated"; + scsi_state[0x28] = "Queue_Full"; + scsi_state[0x30] = "ACA_Active"; + + +/* FRAMEWORK:- add your special string to this array */ + names[0] = "->"; /* Command sent */ + names[1] = "<-"; /* Command returning */ + names[2] = "ILL"; /* Illegal Request spotted */ +} + +fbt:scsi:scsi_transport:entry, +fbt:scsi:scsi_destroy_pkt:entry +/ timeout != 0 && end_time < timestamp / +{ + exit(0); +} + +/* + * relying on scsi_destroy_pkt to get the response is a + * hack that may or may not always work depending on HBA + * and target drivers. + * With arc case: PSARC/2009/033 I can use scsi_hba_pkt_comp(). When that + * is out in the wild it should be used. + */ +/* + * If in any of these probes this->pkt is initialized to zero + * the knock on effect is none of the other probes for this clause further + * down get run. + * + * Do not just comment the probes out. Clause local variables are not + * initialized by default so you will see run time dtrace errors if you do + * where other variables do not get set. + */ + +#define PROBE_SWITCH \ + ((target == 0 || pid == target ) && \ + (scsi_d_target_execname == "" || scsi_d_target_execname == execname)) + +fbt:scsi:scsi_transport:entry +/ !PROBE_SWITCH / +{ + this->pkt = (struct scsi_pkt *)0; +} +fbt:scsi:scsi_transport:entry +/ PROBE_SWITCH / +{ + this->pkt = (struct scsi_pkt *)arg0; +} + +fbt:scsi:scsi_destroy_pkt:entry +{ + this->pkt = (struct scsi_pkt *)arg0; +} + +/* + * FRAMEWORK:- create your entry probe and make this->pkt point to the + * argument that has the scsi_pkt. eg add one like this: + * + */ +fbt:*sd:*sd_sense_key_illegal_request:entry +/ !PROBE_SWITCH / +{ + this->pkt = (struct scsi_pkt *)0; +} +fbt:*sd:*sd_sense_key_illegal_request:entry +/ PROBE_SWITCH / +{ + this->pkt = (struct scsi_pkt *)arg3; +} + +/* FRAMEWORK:- Add your probe name to the list for CDB_PROBES */ +#define CDB_PROBES \ +fbt:scsi:scsi_transport:entry, \ +fbt:*sd:*sd_sense_key_illegal_request:entry + +#define ENTRY_PROBES \ +CDB_PROBES, \ +fbt:scsi:scsi_destroy_pkt:entry + +ENTRY_PROBES +/ this->pkt / +{ + this->cdb = (uchar_t *)this->pkt->pkt_cdbp; + this->scb = (uchar_t *)this->pkt->pkt_scbp; + this->group = ((this->cdb[0] & 0xe0) >> 5); + this->lbalen = 0; +} +/* + * 5.11 allows this->name = "->" but vanilla 5.10 does not so I'm sticking + * to the setting a variable and then choosing the string from the global + * array. + */ +fbt:scsi:scsi_transport:entry +/ this->pkt / +{ + start_time[this->scb] = timestamp; + this->name = 0; +} + +/* + * If we are filtering on target pid or execname then only report + * the return packets that we know we sent. + */ +fbt:scsi:scsi_destroy_pkt:entry +/ start_time[this->scb] == 0 && ( target != 0 || scsi_d_target_execname != "") / +{ + this->pkt = 0; +} + +fbt:scsi:scsi_destroy_pkt:entry +/ this->pkt / +{ + this->name = 1; +} +/* + * FRAMEWORK: Add your probe and set this->name to the offset of + * your string in the names table. + */ +fbt:*sd:*sd_sense_key_illegal_request:entry +/ this->pkt / +{ + this->name = 2; +} + +/* + * Now cope with the differnent CDB layouts based on the group. + * + * Trying to use x = this->group == 0 ? y : z; results in D running out + * of registers so it is all done using seperate probes. + * + * Group Listing: + * + Group 0: 6Byte CDBs: scsi_ops[0x000] thru scsi_ops[0x01f] + * + Group 1: 10Byte CDBs: scsi_ops[0x020] thru scsi_ops[0x03f] + * + Group 2: 10Byte CDBs: scsi_ops[0x040] thru scsi_ops[0x05f] + * + Group 3: Variable Length CDBs: scsi_ops[0x060] thru scsi_ops[0x07f] + * + Group 4: 16Byte CDBs: scsi_ops[0x080] thru scsi_ops[0x09f] + * + Group 5: 12Byte CDBs: scsi_ops[0x0a0] thru scsi_ops[0x0bf] + * Group 6: Vendor Specific CDBs: scsi_ops[0x0c0] thru scsi_ops[0x0df] + * Group 7: Vendor Specific CDBs: scsi_ops[0x0e0] thru scsi_ops[0x0ff] + * + * The groups with a leading plus sign "+" are of importance. + */ +ENTRY_PROBES +/ this->pkt && this->group == 0 / +{ + this->lba = INT32(0, (this->cdb[1] & 0x1f), + this->cdb[2], this->cdb[3]); + this->lbalen = 6; + this->len = INT8(this->cdb[4]); + this->control = this->cdb[5]; + this->sa = 0; + this->cdblen = 6; +} + +ENTRY_PROBES +/ this->pkt && this->group == 1 / +{ + this->lba = INT32(this->cdb[2], this->cdb[3], + this->cdb[4], this->cdb[5]); + this->lbalen = 8; + this->len = INT16(this->cdb[7], this->cdb[8]); + this->control = this->cdb[9]; + this->sa = 0; + this->cdblen = 10; +} + +ENTRY_PROBES +/ this->pkt && this->group == 2 / +{ + this->lba = INT32(this->cdb[2], this->cdb[3], + this->cdb[4], this->cdb[5]); + this->lbalen = 8; + this->len = INT16(this->cdb[7], this->cdb[8]); + this->control = this->cdb[9]; + this->sa = 0; + this->cdblen = 10; +} + +ENTRY_PROBES +/ this->pkt && this->group == 3 / +{ + /* + * This is to get around insufficient DTrace Registers... + * + * Instead of doing this in a single step like: + * + * this->lba = INT64(\ + * this->cdb[2], this->cdb[3], this->cdb[4], this->cdb[5], \ + * this->cdb[6], this->cdb[7], this->cdb[8], this->cdb[9]); + * + * The int64 LBA value must be calculated in 5 steps... + */ + this->lbaUpper = INT32(this->cdb[12], this->cdb[13], + this->cdb[14], this->cdb[15]); + this->lbaLower = INT32(this->cdb[16], this->cdb[17], + this->cdb[18], this->cdb[19]); + this->lba = INT64(this->lbaUpper, this->lbaLower); + this->lbaUpper = 0; + this->lbaLower = 0; + + this->lbalen = 16; + this->len = INT32(this->cdb[28], this->cdb[29], + this->cdb[30], this->cdb[31]); + this->control = this->cdb[1]; + this->sa = INT16(this->cdb[8], this->cdb[9]); + this->cdblen = 32 +} + +ENTRY_PROBES +/ this->pkt && this->group == 4 / +{ + /* + * This is to get around insufficient DTrace Registers... + * + * Instead of doing this in a single step like: + * + * this->lba = INT64(\ + * this->cdb[2], this->cdb[3], this->cdb[4], this->cdb[5], \ + * this->cdb[6], this->cdb[7], this->cdb[8], this->cdb[9]); + * + * The int64 LBA value must be calculated in 5 steps... + */ + this->lbaUpper = INT32(this->cdb[2], this->cdb[3], + this->cdb[4], this->cdb[5]); + this->lbaLower = INT32(this->cdb[6], this->cdb[7], + this->cdb[8], this->cdb[9]); + this->lba = INT64(this->lbaUpper, this->lbaLower); + this->lbaUpper = 0; + this->lbaLower = 0; + + this->lbalen = 16; + this->len = INT32(this->cdb[10], this->cdb[11], + this->cdb[12], this->cdb[13]); + this->control = this->cdb[15]; + this->sa = 0; + this->cdblen = 16; +} + +ENTRY_PROBES +/ this->pkt && this->group == 5 / +{ + this->lba = INT32(this->cdb[2], this->cdb[3], + this->cdb[4], this->cdb[5]); + this->lbalen = 8; + this->len = INT32(this->cdb[6], this->cdb[7], + this->cdb[8], this->cdb[9]); + this->control = this->cdb[11]; + this->sa = 0; + this->cdblen = 12; +} +/* + * We don't know the format of the group 6 and 7 commands as they are + * vendor specific. So I just print the cdb up to a maximum of 32 bytes. + */ +ENTRY_PROBES +/ this->pkt && this->group > 5 / +{ + this->lba = 0; + this->lbalen = 0; + this->len = 0; + this->control = 0; + this->sa = 0; +/* + * At some point pkt_cdblen will make it into 5.10 but I can't at this moment + * workout how I can test this before the script compiles (without wrapping + * this in a ksh script which as I have mentioned above I'm not going to do). + */ +#ifndef __SunOS_5_10 + this->cdblen = this->pkt->pkt_cdblen; +#else + this->cdblen = 32; +#endif + +} + +/* + * The guts of the script. Report what we have if we are required. + * First the stuff we do for both outgoing and incoming. + */ + +ENTRY_PROBES +/ !ARGUMENT_TEST / +{ + this->arg_test_passed = 0; +} + +ENTRY_PROBES +/ ARGUMENT_TEST / +{ + this->arg_test_passed = 1; +} + +/* + * If only reporting IO that takes more then a certan time then use + * speculations to capture the outgoing data and then only commit the + * speculation if the packet takes too long. + */ + +#ifdef REPORT_OVERTIME + +#define SPECULATE speculate(specs[this->scb]); + +ENTRY_PROBES +/ this->arg_test_passed && specs[this->scb] == 0 / +{ + specs[this->scb] = speculation(); +} + +#else + +#define SPECULATE + +#endif /* REPORT_OVERTIME */ + +#ifndef QUIET +ENTRY_PROBES +/ this->arg_test_passed / +{ + SPECULATE + PRINT_TIMESTAMP(); + PRINT_DEV_FROM_PKT(this->pkt); + printf("%s 0x%2.2x %9s address %2.2d:%2.2d, lba 0x%*.*x, ", + names[this->name], + this->cdb[0], + scsi_ops[(uint_t)this->cdb[0], this->sa] != 0 ? + scsi_ops[(uint_t)this->cdb[0], this->sa] : "Unknown", + this->pkt->pkt_address.my_atarget, + this->pkt->pkt_address.my_alun, + this->lbalen, this->lbalen, + this->lba); + printf("len 0x%6.6x, control 0x%2.2x timeout %d CDBP %p", + this->len, this->control, + this->pkt->pkt_time, this->cdb); +} + +CDB_PROBES +/ this->arg_test_passed == 1 / +{ + SPECULATE + printf(" %d %s(%d)", this->arg_test_passed, execname, pid); +} + +/* + * For those who are just not happy without some raw hex numbers. Print the + * raw cdb. + */ +CDB_PROBES +/ this->arg_test_passed == 1 && this->cdblen / +{ + SPECULATE + printf(" cdb(%d) ", this->cdblen); +} + +/* + * Now print each byte out of the cdb. + */ + +#define PRINT_CDB(N) \ +CDB_PROBES \ +/ this->arg_test_passed == 1 && this->cdblen > N/ \ +{ \ + SPECULATE; \ + printf("%2.2x", this->cdb[N]) \ +} +#else /* QUIET */ +#define PRINT_CDB(N) +#endif /* QUIET */ + +PRINT_CDB(0) +PRINT_CDB(1) +PRINT_CDB(2) +PRINT_CDB(3) +PRINT_CDB(4) +PRINT_CDB(5) +PRINT_CDB(6) +PRINT_CDB(7) +PRINT_CDB(8) +PRINT_CDB(9) +PRINT_CDB(10) +PRINT_CDB(11) +PRINT_CDB(12) +PRINT_CDB(13) +PRINT_CDB(14) +PRINT_CDB(15) +PRINT_CDB(16) +PRINT_CDB(17) +PRINT_CDB(18) +PRINT_CDB(19) +PRINT_CDB(20) +PRINT_CDB(21) +PRINT_CDB(22) +PRINT_CDB(23) +PRINT_CDB(24) +PRINT_CDB(25) +PRINT_CDB(26) +PRINT_CDB(27) +PRINT_CDB(28) +PRINT_CDB(29) +PRINT_CDB(30) +PRINT_CDB(31) + +/* + * Now the result on the incoming. + */ +#ifdef PERF_REPORT +fbt:scsi:scsi_destroy_pkt:entry +/ this->arg_test_passed == 1 && start_time[this->scb] != 0 / +{ + @[DEV_NAME(this->pkt), DEV_INST(this->pkt) +#ifdef TARGET_STATS + , this->pkt->pkt_address.a_target +#ifdef LUN_STATS + , this->pkt->pkt_address.a_lun +#endif +#endif + ] = quantize(timestamp - start_time[this->scb]); +} +#endif + +fbt:scsi:scsi_destroy_pkt:entry +/ this->arg_test_passed == 1 / +{ + this->state = *(this->scb); + this->state = this->state & 0x3E; +} +#ifndef QUIET +fbt:scsi:scsi_destroy_pkt:entry +/ this->arg_test_passed == 1 / +{ + SPECULATE + printf(", reason 0x%x (%s) pkt_state 0x%x state 0x%x %s Time %dus\n", + this->pkt->pkt_reason, + scsi_reasons[this->pkt->pkt_reason] != 0 ? + scsi_reasons[this->pkt->pkt_reason] : "Unknown", + this->pkt->pkt_state, this->state, scsi_state[this->state] != 0 ? + scsi_state[this->state] : "Unknown", + start_time[this->scb] != 0 ? + (timestamp - start_time[this->scb])/1000 : 0); +} +#endif + +#ifndef QUIET +CDB_PROBES +/ this->arg_test_passed == 1 / +{ + SPECULATE + printf("\n"); +} +#endif +/* + * printing stacks of where we are called from can be useful however it + * does mean you can't just pipe the output through sort(1). Does not work + * with overtime as you can't put a stack into a speculation. + */ +#ifdef PRINT_STACK +CDB_PROBES +/ this->arg_test_passed == 1 / +{ + stack(10); +} +#endif + +#ifdef REPORT_OVERTIME +fbt:scsi:scsi_destroy_pkt:entry +/ this->arg_test_passed == 1 && +((timestamp - start_time[this->scb]) > REPORT_OVERTIME && start_time[this->scb]) / +{ + SPECULATE + commit(specs[this->scb]); + specs[this->scb] = 0; +} +fbt:scsi:scsi_destroy_pkt:entry +/ this->arg_test_passed == 1 && +! ((timestamp - start_time[this->scb]) > REPORT_OVERTIME) / +{ + discard(specs[this->scb]); + specs[this->scb] = 0; +} +#endif + +fbt:scsi:scsi_destroy_pkt:entry +/ this->arg_test_passed == 1 / +{ + start_time[this->scb] = 0; +} + diff --git a/arcaccess.d b/arcaccess.d new file mode 100644 index 0000000..709d921 --- /dev/null +++ b/arcaccess.d @@ -0,0 +1,23 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +dtrace:::BEGIN +{ + printf("lbolt rate is %d Hertz.\n", `hz); + printf("Tracing lbolts between ARC accesses..."); +} + +fbt::arc_access:entry +{ + self->ab = args[0]; + self->lbolt = args[0]->b_arc_access; +} + +fbt::arc_access:return +/self->lbolt/ +{ + @ = quantize(self->ab->b_arc_access - self->lbolt); + self->ab = 0; + self->lbolt = 0; +} \ No newline at end of file diff --git a/cifsops.d b/cifsops.d new file mode 100644 index 0000000..5356e76 --- /dev/null +++ b/cifsops.d @@ -0,0 +1,21 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +dtrace:::BEGIN +{ + trace("Tracing CIFS operations... Interval 5 secs.\n"); +} + +smb:::op-* +{ + @ops[args[0]->ci_remote, probename] = count(); +} + +profile:::tick-5sec, +dtrace:::END +{ + printf("\n %-32s %-30s %8s\n", "Client", "Operation", "Count"); + printa(" %-32s %-30s %@8d\n", @ops); + trunc(@ops); +} diff --git a/counter.d b/counter.d new file mode 100644 index 0000000..f0bdd3c --- /dev/null +++ b/counter.d @@ -0,0 +1,16 @@ +/* +* Count off and report the number of seconds elapsed +*/ +dtrace:::BEGIN + { + i = 0; + } +profile:::tick-1sec + { + i = i + 1; + trace(i); + } +dtrace:::END + { + trace(i); + } \ No newline at end of file diff --git a/diskioerr.d b/diskioerr.d new file mode 100644 index 0000000..8436c66 --- /dev/null +++ b/diskioerr.d @@ -0,0 +1,82 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet +#pragma D option dynvarsize=8m +#pragma D option switchrate=10hz +/* + Copyright 2012 Sam Zaydel - RackTop Systems + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +This script is intended to track IO where some buffer error was registered, +by looking for the B_ERROR flag in the bufinfo_t struct. +*/ + + + +dtrace:::BEGIN { + printf("Tracing... Hit Ctrl-C to end.\n"); +} +io:::start { + start_time[arg0] = timestamp; + fire = 1; +} +io:::done / (args[0]->b_flags & B_READ) && (args[0]->b_flags & B_ERROR) && (this->start = start_time[arg0]) / { + + @errors[args[1]->dev_pathname, "Error Code", args[0]->b_error] = count(); + this->delta = (timestamp - this->start) / 1000; +/* @plots["Read I/O, us", + execname, + args[1]->dev_statname, + args[1]->dev_pathname, + args[1]->dev_major, + args[1]->dev_minor] = quantize(this->delta); + @avgs["Avg Read I/O, us"] = avg(this->delta); + @avgbytes["Avg Read bytes"] = avg(args[0]->b_bcount); + @perio_t["Avg Read bytes/us"] = avg(args[0]->b_bcount / this->delta); +*/ + start_time[arg0] = 0; + error = 1; +} + +io:::done / !(args[0]->b_flags & B_READ) && (args[0]->b_flags & B_ERROR) && (this->start = start_time[arg0]) / { + + @errors[args[1]->dev_pathname, "Error Code",args[0]->b_error] = count(); + this->delta = (timestamp - this->start) / 1000; +/* @plots["Write I/O, us", + execname, + args[1]->dev_statname, + args[1]->dev_pathname, + args[1]->dev_major, + args[1]->dev_minor] = quantize(this->delta); + @avgs["Avg Write I/O, us"] = avg(this->delta); + @avgbytes["Avg Write bytes"] = avg(args[0]->b_bcount); + @perio_t["Avg Write bytes/us"] = avg(args[0]->b_bcount / this->delta); +*/ + start_time[arg0] = 0; + self->error = 1; + +} + +profile:::tick-5sec / error == 1 / { + printa("%s %s %d %@d\n", @errors); trunc(@errors); +/* printf("%Y\n", walltimestamp); + printa("%s %s %s :: %s\n (%d,%d), us:\n%@d\n", @plots); + printa("%18s %44@d\n", @avgs); + printa("%18s %44@d\n", @avgbytes); + printa("%18s %44@d\n", @perio_t); + printf("%s\n",""); + trunc(@avgs); trunc(@avgbytes); trunc(@perio_t); +*/ + error = 0; +} diff --git a/disklatency.d b/disklatency.d new file mode 100644 index 0000000..46169f6 --- /dev/null +++ b/disklatency.d @@ -0,0 +1,27 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +dtrace:::BEGIN +{ + printf("Tracing... Hit Ctrl-C to end.\n"); +} + +io:::start +{ + start_time[arg0] = timestamp; +} + +io:::done +/this->start = start_time[arg0]/ +{ + this->delta = (timestamp - this->start) / 1000; + @[args[1]->dev_statname, args[1]->dev_major, args[1]->dev_minor] = + quantize(this->delta); + start_time[arg0] = 0; +} + +dtrace:::END +{ + printa(" %s (%d,%d), us:\n%@d\n", @); +} \ No newline at end of file diff --git a/disklatency2.d b/disklatency2.d new file mode 100644 index 0000000..54e75f4 --- /dev/null +++ b/disklatency2.d @@ -0,0 +1,38 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +dtrace:::BEGIN +{ + printf("Tracing... Hit Ctrl-C to end.\n"); +} + +io:::start + +{ + start_time[arg0] = timestamp; +} + +io:::done +/this->start = start_time[arg0]/ +{ + this->delta = (timestamp - this->start) / 1000; + @[execname, args[1]->dev_statname, + args[1]->dev_pathname, + args[1]->dev_major, + args[1]->dev_minor] = quantize(this->delta); + start_time[arg0] = 0; +} + +profile:::tick-10sec + +{ + printa("%s %s :: %s\n (%d,%d), us:\n%@d\n ", @); + printf("%Y\n", walltimestamp); +} + +/*dtrace:::END +{ + printa("%s %s :: %s\n (%d,%d), us:\n%@d\n ", @); +} +*/ diff --git a/disklatencygen.d b/disklatencygen.d new file mode 100644 index 0000000..fe0ff50 --- /dev/null +++ b/disklatencygen.d @@ -0,0 +1,27 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +dtrace:::BEGIN +{ + printf("Tracing... Hit Ctrl-C to end.\n"); +} + +io:::start +{ + start_time[arg0] = timestamp; +} + +io:::done +/this->start = start_time[arg0]/ +{ + this->delta = (timestamp - this->start) / 1000; + @avgs[args[1]->dev_pathname,args[1]->dev_statname, args[1]->dev_major, args[1]->dev_minor] = + avg(this->delta); + start_time[arg0] = 0; +} + +tick-1sec +{ + printa("%s\t%s\t%d\t%d\t%@d\n", @avgs); +} diff --git a/disklatencyrw.d b/disklatencyrw.d new file mode 100644 index 0000000..35438f7 --- /dev/null +++ b/disklatencyrw.d @@ -0,0 +1,103 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet +#pragma D option dynvarsize=4m +#pragma D option switchrate=10hz +/* + Copyright 2012 Sam Zaydel - RackTop Systems + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Script will collect per device IO statistics, and will display a distribution +graph similar to one below, along with statistics printed at the bottom. This is a +per-device output. + +Read I/O, us sched sd4 :: /devices/pci@0,0/pci15ad,1976@10/sd@3,0:a + (292,256), us: + + value ------------- Distribution ------------- count + 8 | 0 + 16 | 6 + 32 |@ 10 + 64 | 0 + 128 |@@ 23 + 256 |@ 8 + 512 | 0 + 1024 | 0 + 2048 |@ 16 + 4096 |@ 10 + 8192 |@@@@@@@@ 97 + 16384 |@@@@@@@@@@@@ 151 + 32768 |@@@@@@@@@@ 125 + 65536 |@@@ 39 + 131072 | 4 + 262144 | 2 + 524288 | 0 + 1048576 | 2 + 2097152 | 0 + + Avg Write I/O, us 890 + Avg Read I/O, us 31407 + Avg Write bytes 8068 + Avg Read bytes 131072 + Avg Read bytes/us 6 +Avg Write bytes/us 7 + +*/ + +dtrace:::BEGIN { + printf("Tracing... Hit Ctrl-C to end.\n"); +} +io:::start { + start_time[arg0] = timestamp; + fire = 1; +} +io:::done / (args[0]->b_flags & B_READ) && (this->start = start_time[arg0]) / { + this->delta = (timestamp - this->start) / 1000; + @plots["Read I/O, us", + execname, + args[1]->dev_statname, + args[1]->dev_pathname, + args[1]->dev_major, + args[1]->dev_minor] = quantize(this->delta); + @avgs["Avg Read I/O, us"] = avg(this->delta); + @avgbytes["Avg Read bytes"] = avg(args[0]->b_bcount); + @perio_t["Avg Read bytes/us"] = avg(args[0]->b_bcount / this->delta); + start_time[arg0] = 0; +} + +io:::done / !(args[0]->b_flags & B_READ) && (this->start = start_time[arg0]) / { + this->delta = (timestamp - this->start) / 1000; + @plots["Write I/O, us", + execname, + args[1]->dev_statname, + args[1]->dev_pathname, + args[1]->dev_major, + args[1]->dev_minor] = quantize(this->delta); + @avgs["Avg Write I/O, us"] = avg(this->delta); + @avgbytes["Avg Write bytes"] = avg(args[0]->b_bcount); + /* self->w_lat = args[0]->b_bcount / self->delta; */ + @perio_t["Avg Write bytes/us"] = avg(args[0]->b_bcount / this->delta); + start_time[arg0] = 0; +} + +profile:::tick-5sec / fire > 0 / { + printf("%Y\n", walltimestamp); + printa("%s %s %s :: %s\n (%d,%d), us:\n%@d\n", @plots); + printa("%18s %44@d\n", @avgs); + printa("%18s %44@d\n", @avgbytes); + printa("%18s %44@d\n", @perio_t); + printf("%s\n",""); + trunc(@avgs); trunc(@avgbytes); trunc(@perio_t); + fire = 0; +} diff --git a/downstack.d b/downstack.d new file mode 100644 index 0000000..28737e7 --- /dev/null +++ b/downstack.d @@ -0,0 +1,23 @@ +#!/usr/sbin/dtrace -FCs + +$1:entry +{ + self->trace = 1; +} + +$1:return +{ + printf("Returns 0x%llx", arg1); + self->trace = 0; +} + +::entry +/self->trace == 1/ +{ +} + +::return +/self->trace == 1/ +{ + printf("Returns 0x%llx", arg1); +} diff --git a/filemon_zfs.d b/filemon_zfs.d new file mode 100644 index 0000000..b8c1831 --- /dev/null +++ b/filemon_zfs.d @@ -0,0 +1,93 @@ +#!/usr/sbin/dtrace -s + +/* + * + * filemon_zfs.d - Monitors specific file access + * Written using DTrace. + * + * + * $Id: filemon_zfs.d 1 2010-03-12 14:16:26Z sergio $ + * + * USAGE: filemon_zfs.d + * + * eg, + * ./filemon_zfs.d /var/adm/messages # Monitor access to /var/adm/messages + * + * Must be root or with DTrace role privilege + * + * NOTES: This script uses dtrace so it should work on Solaris or OpenSolaris + * + * THANKS: The students of a DTrace course for the idea + * + * COPYRIGHT: Copyright (c) 2008 Sergio Rodriguez de Guzman Martinez + * + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at Docs/cddl1.txt + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * CDDL HEADER END + * + * Author: Sergio Rodriguez de Guzman [Madrid, Spain] + * + * 12-03-2010 Sergio Rodriguez de Guzman Created this. + * + * + */ + +#pragma D option quiet + +BEGIN +{ + printf ("%20s%20s%8s%10s%10s\n", "DATE", "CMD", "R/W/D", "USER", "PID"); +} + +zfs_read:entry, +zfs_getpage:entry +{ + self->filepath = args[0]->v_path; +} + +zfs_write:entry, +zfs_putpage:entry +{ + self->filepath = args[0]->v_path; +} + +zfs_write:return, +zfs_putpage:return +/ strstr(stringof(self->filepath), $1) != NULL / +{ + printf("%20Y%20s%8s%10d%10d\n", + walltimestamp, execname, "W", uid, pid); + self->filepath = 0; +} + +zfs_read:return, +zfs_getpage:return +/ strstr(stringof(self->filepath), $1) != NULL / +{ + printf("%20Y%20s%8s%10d%10d\n", + walltimestamp, execname, "R", uid, pid); + self->filepath = 0; +} + +zfs_remove:entry +{ + self->filepath = strjoin( stringof(args[0]->v_path), "/" ); + self->filepath = strjoin( self->filepath, stringof(args[1]) ); +} + +zfs_remove:return +/ strstr(stringof(self->filepath), $1) != NULL / +{ + printf("%20Y%20s%8s%10d%10d\n", + walltimestamp, execname, "D", uid, pid); +} diff --git a/findddt.d b/findddt.d new file mode 100644 index 0000000..a729d6b --- /dev/null +++ b/findddt.d @@ -0,0 +1,31 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +BEGIN +{ +fpath = $1 +} + +syscall::read:entry,syscall::write:entry +/ fds[arg0].fi_pathname == fpath / +{ +@fs[execname, fds[arg0].fi_pathname] = count(); +self->ts = timestamp; + } + +::ddt*:entry, ::zio_ddt*:entry +/self->ts && probefunc != "ddt_prefetch"/ +{ +@st[probefunc, stack()] = count(); +printf("%s %Y\n", probefunc, walltimestamp); + +self->ts = 0; +} + +END +{ +printa("Command: %-20s Count: %-@8d\nPath: %s", @fs); +trunc(@st,5); +exit(0); +} diff --git a/hotfile.d b/hotfile.d new file mode 100644 index 0000000..df6e582 --- /dev/null +++ b/hotfile.d @@ -0,0 +1,98 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +dtrace:::BEGIN +{ + printf("Tracing... Hit Ctrl-C to end.\n"); + line = "-----------"; + x = 0; + hotcount = 10; +} + +nfsv3:::op-read-start, +nfsv3:::op-write-start + +{ + this->filepath = args[1]->noi_curpath; + start[args[1]->noi_xid] = timestamp; + active_nfs = 1; + x = 1; +} + +io:::start +/active_nfs != 0/ + +{ + this->bytes = args[0]->b_bcount; + /* @bytesize[args[2]->fi_pathname,pid, curpsinfo->pr_psargs] = quantize(this->bytes); */ + @totalb[args[0]->b_flags & B_READ ? "[READ]" : "[WRITE]", args[2]->fi_pathname] = sum(this->bytes); +} + +nfsv3:::op-read-done, +nfsv3:::op-write-done +/start[args[1]->noi_xid] != 0/ + +{ + this->elapsed = timestamp - start[args[1]->noi_xid]; + /* this->filepath = args[1]->noi_curpath; */ + @nfs_rwtime[this->filepath, probename == "op-read-done" ? "Read time(us)" : "Write time(us)"] = + quantize(this->elapsed / 1000); + /* @host[args[0]->ci_remote] = sum(this->elapsed); */ + @rwlatency[args[0]->ci_remote, this->filepath, + probename == "op-read-done" ? "Read time" : "Write time"] = sum(this->elapsed); + + start[args[1]->noi_xid] = 0; + active_nfs = 0; +} + +nfsv3:::op-read-done +{ + this->bytes = args[2]->res_u.ok.data.data_len; + @nfsopsbypath["[READ]", this->filepath] = count(); + /* Collect sum of bytes for each client IP address */ + @nfsreadbytes[args[0]->ci_remote, "[READ]"] = sum(this->bytes); + @busyfiles["[READ]", this->filepath] = sum(this->bytes); +} + + +nfsv3:::op-write-done +{ + this->bytes = args[2]->res_u.ok.count; + @nfsopsbypath["[WRITE]", this->filepath] = count(); + /* Collect sum of bytes for each client IP address */ + @nfswritebytes[args[0]->ci_remote, "[WRITE]"] = sum(this->bytes); + @busyfiles["[WRITE]", this->filepath] = sum(this->bytes); +} + +tick-1sec +/ x != 0 / +{ + printa("Client IP: %-16s NFS I/O: %-8s %@d bytes\n", @nfsreadbytes); + printa("Client IP: %-16s NFS I/O: %-8s %@d bytes\n", @nfswritebytes); + printa(" Physical I/O: %s %@d bytes\n", @totalb); + normalize(@rwlatency,1000000); trunc(@rwlatency,5); trunc(@nfs_rwtime,5); + printa("Client IP: %-16s Path: %s\n%s (ms) %@d\n", @rwlatency); printa(@nfs_rwtime); + trunc(@nfs_rwtime); trunc(@rwlatency); + trunc(@nfsreadbytes); trunc(@nfswritebytes); trunc(@totalb); + x = 0; +} + +dtrace:::END +{ + /* trunc(@bytesize,5); + trunc(@totalb,5); + printf("\n%-60s %-8s %s\n", "Pathname", "PID", "CMD"); + printa("%-60s %-8d %S\n%@d\n", @bytesize); + printa("----------\nPath: %s\nBytes Total: %@d\n----------\n",@totalb); */ + /* Summarize total operations and number of bytes read and written + for each file, organized by path, and trimmed to number of files + equal to value of hotcount var. */ + printf("%s\n\n","---------------------- NFSv3 Read/Write top files Summary ----------------------"); + trunc(@busyfiles, hotcount); + trunc(@nfsopsbypath, hotcount); + + printa("%s Path: %s %@d bytes, Operations: %@d\n", @busyfiles,@nfsopsbypath); + /* normalize(@file, 1000); + printa(@file); */ +} diff --git a/io_slower_than_50ms.d b/io_slower_than_50ms.d new file mode 100644 index 0000000..8086413 --- /dev/null +++ b/io_slower_than_50ms.d @@ -0,0 +1,91 @@ +#!/usr/sbin/dtrace -s +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at http://smartos.org/CDDL + * + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file. + * + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Copyright (c) 2012 RackTop Systems, All rights reserved. + * + * Script should help to pinpoint disks which are seemingly slower than + * most, i.e. we can collect top 5 or in this case top 10 slowest to respond + * disks on a system. + * io_slower_than_50ms.d: show Physical I/O events taking longer than 50 ms. + * + * USAGE: io_slower_than_50ms.d + * + */ + +#pragma D option quiet +#pragma D option defaultargs +#pragma D option switchrate=10hz + +#pragma D option quiet + +BEGIN { + x = 0; + y = 0; + watermark = 50; /* We want to track all IO slower than 50ms. */ + start_d = timestamp; + to_millisec = 1000000; + div = "--------------------------------------------------------------------------------"; +} + +io::bdev_strategy:start +{ + st[args[0]->b_edev, args[0]->b_blkno, args[0]->b_addr] = timestamp; + blk_addr = args[0]->b_addr; +} + +io:::done +/ st[args[0]->b_edev, args[0]->b_blkno, args[0]->b_addr] / +{ + x = (timestamp - st[args[0]->b_edev, args[0]->b_blkno, args[0]->b_addr]) / to_millisec; + @[args[0]->b_bcount/0x1000] = lquantize(x,20,80,10); +} + +io:::done +/* We should arrive here eveery time we hip IO that passes the watermark +and we increment counter as well as add event to aggregation. */ +/ x > watermark / +{ + y += 1; + @slower_than_wm[args[1]->dev_pathname] = count(); +} + +io:::done +/ st[args[0]->b_edev, args[0]->b_blkno, args[0]->b_addr] / +{ + st[args[0]->b_edev, args[0]->b_blkno, args[0]->b_addr] = 0; + blk_addr = 0; + x = 0; +} + +END +{ + stop_d = (timestamp - start_d) / to_millisec; + trunc(@slower_than_wm,10); + printa("\t time(ms)\t\t\t\t blocksize (KB): %d %24@d\n",@); + printf("%s\n",div); + printf("Top 10 slow disks by count of IO events longer than %d (ms)\n",watermark); + printf("%s\n",div); + printa("Number of slow IO events: %@d device: %s\n",@slower_than_wm); + printf("%s\n",div); + printf("Counted a total of %d events slower than %d (ms) in last %d seconds\n",y,watermark,stop_d/1000); +} diff --git a/iofile.d b/iofile.d new file mode 100755 index 0000000..255057a --- /dev/null +++ b/iofile.d @@ -0,0 +1,79 @@ +#!/usr/sbin/dtrace -s +/* + * iofile.d - I/O wait time by filename and process. + * Written using DTrace (Solaris 10 3/05). + * + * This prints the total I/O wait times for each filename by process. + * This can help determine why an application is performing poorly by + * identifying which file they are waiting on, and the total times. + * Both disk and NFS I/O are measured. + * + * $Id: iofile.d 3 2007-08-01 10:50:08Z brendan $ + * + * USAGE: iofile.d # wait, then hit Ctrl-C to end + * + * FIELDS: + * PID Process ID + * CMD Process name + * TIME Total wait time for disk events, us + * FILE File pathname + * + * BASED ON: /usr/demo/dtrace/iocpu.d + * + * SEE ALSO: iosnoop, iotop + * + * PORTIONS: Copyright (c) 2005, 2006 Brendan Gregg. + * + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at Docs/cddl1.txt + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * CDDL HEADER END + * + * 24-Jul-2005 Brendan Gregg Created this. + * 20-Apr-2006 " " Last update. + */ + +#pragma D option quiet + +/* print header */ +dtrace:::BEGIN +{ + printf("Tracing... Hit Ctrl-C to end.\n"); +} + +/* save time at start */ +io:::wait-start +{ + self->start = timestamp; +} + +/* process event */ +io:::wait-done +/self->start/ +{ + /* + * wait-done is used as we are measing wait times. It also + * is triggered when the correct thread is on the CPU, obviating + * the need to link process details to the start event. + */ + this->elapsed = timestamp - self->start; + @files[pid, execname, args[2]->fi_pathname] = sum(this->elapsed); + self->start = 0; +} + +/* print report */ +dtrace:::END +{ + normalize(@files, 1000); + printf("%6s %-12s %8s %s\n", "PID", "CMD", "TIME", "FILE"); + printa("%6d %-12.12s %@8d %s\n", @files); +} diff --git a/iofileb.d b/iofileb.d new file mode 100755 index 0000000..e7572f3 --- /dev/null +++ b/iofileb.d @@ -0,0 +1,59 @@ +#!/usr/sbin/dtrace -s +/* + * iofileb.d - I/O bytes by filename and process. + * Written using DTrace (Solaris 10 3/05). + * + * This prints a summary of requested disk activity by pathname, + * providing totals of the I/O events in bytes. It is a companion to the + * iofile.d script - which prints in terms of I/O wait time, not bytes. + * I/O wait time is a better metric for understanding performance issues. + * Both disk and NFS I/O are measured. + * + * $Id: iofileb.d 3 2007-08-01 10:50:08Z brendan $ + * + * USAGE: iofileb.d # wait several seconds, then hit Ctrl-C + * + * FIELDS: + * PID process ID + * CMD command name + * KB Kilobytes of disk I/O + * FILE Full pathname of the file + * + * COPYRIGHT: Copyright (c) 2006 Brendan Gregg. + * + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at Docs/cddl1.txt + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * CDDL HEADER END + * + * 20-Feb-2006 Brendan Gregg Created this. + * 20-Feb-2006 " " Last update. + */ + +#pragma D option quiet + +dtrace:::BEGIN +{ + printf("Tracing... Hit Ctrl-C to end.\n"); +} + +io:::start +{ + @files[pid, execname, args[2]->fi_pathname] = sum(args[0]->b_bcount); +} + +dtrace:::END +{ + normalize(@files, 1024); + printf("%6s %-12s %6s %s\n", "PID", "CMD", "KB", "FILE"); + printa("%6d %-12.12s %@6d %s\n", @files); +} diff --git a/ioinfopervdev.d b/ioinfopervdev.d new file mode 100644 index 0000000..86168d4 --- /dev/null +++ b/ioinfopervdev.d @@ -0,0 +1,10 @@ +zio_vdev_io_start:entry +{ + printf("vdev_bytes_statistics: NULL: %d\tREAD: %d\tWRITE: %d\tFREE: %d\tCLAIM: %d\tIOCTL: %d\n", + args[0]->io_vd->vdev_stat.vs_bytes[0], /* NULL */ + args[0]->io_vd->vdev_stat.vs_bytes[1], /* READ */ + args[0]->io_vd->vdev_stat.vs_bytes[2], /* WRITE */ + args[0]->io_vd->vdev_stat.vs_bytes[3], /* FREE */ + args[0]->io_vd->vdev_stat.vs_bytes[4], /* CLAIM */ + args[0]->io_vd->vdev_stat.vs_bytes[5]); /* IOCTL */ +} diff --git a/iscsi-inactivity-monitor.d b/iscsi-inactivity-monitor.d new file mode 100644 index 0000000..1177846 --- /dev/null +++ b/iscsi-inactivity-monitor.d @@ -0,0 +1,84 @@ +#!/usr/sbin/dtrace -Cs + +/* + * iscsi-inactivity-monitor.d + * + * This dtrace script looks for the lack of iscsi target traffic and + * will log the non-event by timestamp. + * + * Sample output: + ** iSCSI target inactivity monitor + ** Inactivity threshold 11 seconds + ** Start time 2010 Dec 22 16:41:54 + ** No incoming iSCSI traffic since 2010 Dec 22 16:41:54, current time 2010 Dec 22 16:42:06 + ** No outgoing iSCSI traffic since 2010 Dec 22 16:41:54, current time 2010 Dec 22 16:42:06 + ** iSCSI inactivity monitor, 856 total transfers at 2010 Dec 22 17:46:37 + * + * Copyright 2010, Nexenta Systems, Inc. All rights reserved. + */ + +#pragma D option quiet + +inline int THRESHOLD = 11000000000; /* nanoseconds */ + +BEGIN { + n = 0; + start_active = 0; /* current activity flag */ + start_ts = timestamp + THRESHOLD; + start_ws = walltimestamp; + done_active = 0; + done_ts = start_ts; + done_ws = start_ws; + printf("iSCSI target inactivity monitor\n"); + printf("Inactivity threshold %d seconds\n", THRESHOLD/1000000000); + printf("Start time %Y\n", start_ws); +} + +iscsi:::xfer-start +{ + n++; + start_ts = timestamp + THRESHOLD; + start_ws = walltimestamp; +} +iscsi:::xfer-start +/!start_active/ +{ + start_active = 1; + printf("Incoming traffic at %Y\n", walltimestamp); +} + +profile:::tick-1sec +/timestamp > start_ts && start_active/ +{ + printf("No incoming iSCSI traffic since %Y, current time %Y\n", start_ws, walltimestamp); + start_active = 0; +} + +iscsi:::xfer-done +{ + done_ts = timestamp + THRESHOLD; + done_ws = walltimestamp; +} +iscsi:::xfer-done +/!done_active/ +{ + done_active = 1; + printf("Outgoing traffic at %Y\n", walltimestamp); +} + +profile:::tick-1sec +/timestamp > done_ts && done_active/ +{ + printf("No outgoing iSCSI traffic since %Y, current time %Y\n", done_ws, walltimestamp); + done_active = 0; +} + +profile:::tick-1hour +{ + printf("iSCSI target inactivity monitor status at %Y\n", walltimestamp); + printf("\tTotal transfers = %d\n", n); + printf("\tIncoming flow %s\n", start_active ? "active" : "inactive"); + printf("\tLast incoming transfer at %Y\n", start_ws); + printf("\tOutgoing flow %s\n", done_active ? "active" : "inactive"); + printf("\tLast outgoing transfer at %Y\n", done_ws); +} \ No newline at end of file diff --git a/iscsiio.d b/iscsiio.d new file mode 100644 index 0000000..332e2b0 --- /dev/null +++ b/iscsiio.d @@ -0,0 +1,61 @@ +#!/usr/sbin/dtrace -s +/* + * iscsiio.d - Report iSCSI I/O. Solaris Nevada, DTrace. + * + * This traces requested iSCSI data I/O events when run on an iSCSI server. + * The output reports iSCSI read and write I/O while this script was running. + * + * USAGE: iscsiio.d # Hit Ctrl-C to end + * + * FIELDS: + * REMOTE IP IP address of the client + * EVENT Data I/O event (read/write) + * COUNT Number of I/O events + * Kbytes Total data Kbytes transferred + * KB/sec Average data Kbytes/sec transferred + */ + +#pragma ident "@(#)iscsiio.d 1.2 07/03/27 SMI" + +#pragma D option quiet + +dtrace:::BEGIN +{ + printf("Tracing... Hit Ctrl-C to end.\n"); + start = timestamp; +} + +iscsi*:::data-send +{ + @num[args[0]->ci_remote, "read"] = count(); + @bytes[args[0]->ci_remote, "read"] = sum(args[1]->ii_datalen); + @rate[args[0]->ci_remote, "read"] = sum(args[1]->ii_datalen); +} + +iscsi*:::data-receive +{ + @num[args[0]->ci_remote, "write"] = count(); + @bytes[args[0]->ci_remote, "write"] = sum(args[1]->ii_datalen); + @rate[args[0]->ci_remote, "write"] = sum(args[1]->ii_datalen); +} + +iscsi*:::scsi-command +/args[2]->ic_cdb[0] == 0x0a || args[2]->ic_cdb[0] == 0x2a/ +{ + /* + * scsi-command writes also move data. Their codes are in + * /usr/include/sys/scsi/generic/commands.h . + */ + @num[args[0]->ci_remote, "write"] = count(); + @bytes[args[0]->ci_remote, "write"] = sum(args[1]->ii_datalen); + @rate[args[0]->ci_remote, "write"] = sum(args[1]->ii_datalen); +} + +dtrace:::END +{ + normalize(@rate, (timestamp - start) * 1024 / 1000000000); + printf(" %-26s %8s %8s %10s %10s\n", "REMOTE IP", "EVENT", "COUNT", + "Kbytes", "KB/sec"); + normalize(@bytes, 1024); + printa(" %-26s %8s %@8d %@10d %@10d\n", @num, @bytes, @rate); +} diff --git a/iscsirwlat.d b/iscsirwlat.d new file mode 100644 index 0000000..5c3f8a3 --- /dev/null +++ b/iscsirwlat.d @@ -0,0 +1,69 @@ +#!/usr/sbin/dtrace -s +/* + * iscsirwlat.d - Report iSCSI Read/Write Latency. Solaris Nevada, DTrace. + * + * This traces iSCSI data I/O events when run on an iSCSI server, and + * produces a report of read/write latency in microseconds. + * + * USAGE: iscsirwlat.d # Hit Ctrl-C to end + * + * FIELDS: + * EVENT Data I/O event (data-send/data-receive) + * REMOTE IP IP address of the client + * COUNT Number of I/O events + * Kbytes Total data Kbytes transferred + * + * NOTE: If you try to sum the read times, they will sometimes add to + * a time longer than the sample time - the reason is that reads can + * occur in parallel, and so suming them together will overcount. + */ + +#pragma ident "@(#)iscsirwlat.d 1.3 07/03/28 SMI" + +#pragma D option quiet + +dtrace:::BEGIN +{ + printf("Tracing... Hit Ctrl-C to end.\n"); +} + +iscsi*:::scsi-command +/args[2]->ic_cdb[0] == 0x08 || args[2]->ic_cdb[0] == 0x28/ +{ + /* + * self-> variables can't be used, as one thread receives the + * scsi command while another receives the reads. + */ + start_read[args[1]->ii_itt, args[1]->ii_initiator] = timestamp; +} + +iscsi*:::scsi-command +/args[2]->ic_cdb[0] == 0x0a || args[2]->ic_cdb[0] == 0x2a/ +{ + start_write[args[1]->ii_itt, args[1]->ii_initiator] = timestamp; +} + +iscsi*:::data-send +/start_read[args[1]->ii_itt, args[1]->ii_initiator] && + (args[1]->ii_flags & ISCSI_FLAG_FINAL)/ +{ + @read[args[0]->ci_remote] = quantize(timestamp - + start_read[args[1]->ii_itt, args[1]->ii_initiator]); + start_read[args[1]->ii_ttt, args[1]->ii_initiator] = 0; +} + +iscsi*:::scsi-response +/start_write[args[1]->ii_itt, args[1]->ii_initiator]/ +{ + @write[args[0]->ci_remote] = quantize(timestamp - + start_write[args[1]->ii_itt, args[1]->ii_initiator]); + start_write[args[1]->ii_itt, args[1]->ii_initiator] = 0; +} + +dtrace:::END +{ + printf("Read Latency (ns),\n"); + printa(@read); + printf("Write Latency (ns),\n"); + printa(@write); +} diff --git a/iscsisnoop.d b/iscsisnoop.d new file mode 100644 index 0000000..7d8412f --- /dev/null +++ b/iscsisnoop.d @@ -0,0 +1,71 @@ +#!/usr/sbin/dtrace -s +/* + * iscsisnoop.d - Snoop iSCSI events. Solaris Nevada, DTrace. + * + * This snoops iSCSI events when run on an iSCSI server. + * + * USAGE: iscsisnoop.d # Hit Ctrl-C to end + * + * FIELDS: + * CPU CPU event occured on + * REMOTE IP IP address of the client + * EVENT Data I/O event (data-send/data-receive) + * BYTES Data bytes + * ITT Initiator task tag + * SCSIOP SCSI opcode as a description, as hex, or '-' + * + * NOTE: On multi-CPU servers output may not be in correct time order + * (shuffled). A change in the CPU column is a hint that this happened. + * If this is a problem, print an extra timestamp field and post sort. + */ + +#pragma ident "@(#)iscsisnoop.d 1.2 07/03/27 SMI" + +#pragma D option quiet +#pragma D option switchrate=10 + +dtrace:::BEGIN +{ + printf("%3s %-26s %-14s %6s %10s %6s\n", "CPU", "REMOTE IP", + "EVENT", "BYTES", "ITT", "SCSIOP"); + + /* + * SCSI opcode to string translation hash. This is from + * /usr/include/sys/scsi/generic/commands.h. If you would + * rather all hex, comment this out. + */ + scsiop[0x08] = "read"; + scsiop[0x0a] = "write"; + scsiop[0x0b] = "seek"; + scsiop[0x28] = "read(10)"; + scsiop[0x2a] = "write(10)"; + scsiop[0x2b] = "seek(10)"; +} + +iscsi*:::data-*, +iscsi*:::login-*, +iscsi*:::logout-*, +iscsi*:::nop-*, +iscsi*:::task-*, +iscsi*:::async-*, +iscsi*:::scsi-response +{ + printf("%3d %-26s %-14s %6d %10d -\n", cpu, args[0]->ci_remote, + probename, args[1]->ii_datalen, args[1]->ii_itt); +} + +iscsi*:::scsi-command +/scsiop[args[2]->ic_cdb[0]] != NULL/ +{ + printf("%3d %-26s %-14s %6d %10d %s\n", cpu, args[0]->ci_remote, + probename, args[1]->ii_datalen, args[1]->ii_itt, + scsiop[args[2]->ic_cdb[0]]); +} + +iscsi*:::scsi-command +/scsiop[args[2]->ic_cdb[0]] == NULL/ +{ + printf("%3d %-26s %-14s %6d %10d 0x%x\n", cpu, args[0]->ci_remote, + probename, args[1]->ii_datalen, args[1]->ii_itt, + args[2]->ic_cdb[0]); +} diff --git a/iscsiterr2.d b/iscsiterr2.d new file mode 100644 index 0000000..d9f892c --- /dev/null +++ b/iscsiterr2.d @@ -0,0 +1,231 @@ +#!/usr/sbin/dtrace -Cs + +#pragma D option quiet +#pragma D option switchrate=10hz + +typedef enum idm_status { + IDM_STATUS_SUCCESS = 0, + IDM_STATUS_FAIL, + IDM_STATUS_NORESOURCES, + IDM_STATUS_REJECT, + IDM_STATUS_IO, + IDM_STATUS_ABORTED, + IDM_STATUS_SUSPENDED, + IDM_STATUS_HEADER_DIGEST, + IDM_STATUS_DATA_DIGEST, + IDM_STATUS_PROTOCOL_ERROR, + IDM_STATUS_LOGIN_FAIL +} idm_status_t; + +dtrace:::BEGIN +{ + /* + * The following was generated from the SCSI_CMDS_KEY_STRINGS + * definitions in /usr/include/sys/scsi/generic/commands.h using sed. + * Additional codes gathered from http://en.wikipedia.org/wiki/SCSI_command + */ + scsi_cmd[0x00] = "test_unit_ready"; + scsi_cmd[0x01] = "rezero/rewind"; + scsi_cmd[0x03] = "request_sense"; + scsi_cmd[0x04] = "format"; + scsi_cmd[0x05] = "read_block_limits"; + scsi_cmd[0x07] = "reassign"; + scsi_cmd[0x08] = "read"; + scsi_cmd[0x0a] = "write"; + scsi_cmd[0x0b] = "seek"; + scsi_cmd[0x0f] = "read_reverse"; + scsi_cmd[0x10] = "write_file_mark"; + scsi_cmd[0x11] = "space"; + scsi_cmd[0x12] = "inquiry"; + scsi_cmd[0x13] = "verify"; + scsi_cmd[0x14] = "recover_buffer_data"; + scsi_cmd[0x15] = "mode_select"; + scsi_cmd[0x16] = "reserve"; + scsi_cmd[0x17] = "release"; + scsi_cmd[0x18] = "copy"; + scsi_cmd[0x19] = "erase_tape"; + scsi_cmd[0x1a] = "mode_sense"; + scsi_cmd[0x1b] = "load/start/stop"; + scsi_cmd[0x1c] = "get_diagnostic_results"; + scsi_cmd[0x1d] = "send_diagnostic_command"; + scsi_cmd[0x1e] = "door_lock"; + scsi_cmd[0x23] = "read_format_capacity"; + scsi_cmd[0x24] = "set_window"; + scsi_cmd[0x25] = "read_capacity"; + scsi_cmd[0x28] = "read(10)"; + scsi_cmd[0x29] = "read_generation"; + scsi_cmd[0x2a] = "write(10)"; + scsi_cmd[0x2b] = "seek(10)"; + scsi_cmd[0x2c] = "erase(10)"; + scsi_cmd[0x2d] = "read_updated_block"; + scsi_cmd[0x2e] = "write_verify"; + scsi_cmd[0x2f] = "verify(10)"; + scsi_cmd[0x30] = "search_data_high"; + scsi_cmd[0x31] = "search_data_equal"; + scsi_cmd[0x32] = "search_data_low"; + scsi_cmd[0x33] = "set_limits"; + scsi_cmd[0x34] = "read_position"; + scsi_cmd[0x35] = "synchronize_cache"; + scsi_cmd[0x36] = "lock_unlock_cache"; + scsi_cmd[0x37] = "read_defect_data"; + scsi_cmd[0x38] = "medium_scan"; + scsi_cmd[0x39] = "compare"; + scsi_cmd[0x3a] = "copy_verify"; + scsi_cmd[0x3b] = "write_buffer"; + scsi_cmd[0x3c] = "read_buffer"; + scsi_cmd[0x3d] = "update_block"; + scsi_cmd[0x3e] = "read_long"; + scsi_cmd[0x3f] = "write_long"; + scsi_cmd[0x40] = "change_definition"; + scsi_cmd[0x41] = "write_same(10)"; + scsi_cmd[0x44] = "report_densities/read_header"; + scsi_cmd[0x45] = "play_audio(10)"; + scsi_cmd[0x46] = "get_configuration"; + scsi_cmd[0x47] = "play_audio_msf"; + scsi_cmd[0x4a] = "get_event_status_notification"; + scsi_cmd[0x4b] = "pause_resume"; + scsi_cmd[0x4c] = "log_select"; + scsi_cmd[0x4d] = "log_sense"; + scsi_cmd[0x50] = "xdwrite(10)"; + scsi_cmd[0x51] = "xpwrite(10)"; + scsi_cmd[0x52] = "xdread(10)"; + scsi_cmd[0x53] = "xdwriteread(10)"; + scsi_cmd[0x54] = "send_opc_information"; + scsi_cmd[0x55] = "mode_select(10)"; + scsi_cmd[0x56] = "reserve(10)"; + scsi_cmd[0x57] = "release(10)"; + scsi_cmd[0x58] = "repair track"; + scsi_cmd[0x5a] = "mode_sense(10)"; + scsi_cmd[0x5b] = "close_track/session"; + scsi_cmd[0x5c] = "read_buffer_capacity"; + scsi_cmd[0x5d] = "send_cue_sheet"; + scsi_cmd[0x5e] = "persistent_reserve_in"; + scsi_cmd[0x5f] = "persistent_reserve_out"; + scsi_cmd[0x7e] = "extended_cdb"; + scsi_cmd[0x7f] = "variable_length_cdb"; + scsi_cmd[0x80] = "write_file_mark(16)"; + scsi_cmd[0x81] = "read_reverse(16)"; + scsi_cmd[0x82] = "regenerate(16)"; + scsi_cmd[0x83] = "extended_copy"; + scsi_cmd[0x84] = "receive_copy_results"; + scsi_cmd[0x85] = "ata_command_passthrough(16)"; + scsi_cmd[0x86] = "access_control_in"; + scsi_cmd[0x87] = "access_control_out"; + scsi_cmd[0x88] = "read(16)"; + scsi_cmd[0x8a] = "write(16)"; + scsi_cmd[0x8b] = "orwrite"; + scsi_cmd[0x8c] = "read_attribute"; + scsi_cmd[0x8d] = "write_attribute"; + scsi_cmd[0x8e] = "write_verify(16)"; + scsi_cmd[0x8f] = "verify(16)"; + scsi_cmd[0x90] = "prefetch(16)"; + scsi_cmd[0x91] = "synchronize_cache(16)"; + scsi_cmd[0x92] = "space(16)/lock_unlock_cache(16)"; + scsi_cmd[0x93] = "write_same(16)"; + scsi_cmd[0x9e] = "service_action_in(16)"; + scsi_cmd[0x9f] = "service_action_out(16)"; + scsi_cmd[0xa0] = "report_luns"; + scsi_cmd[0xa1] = "ata_command_passthrough(12)"; + scsi_cmd[0xa2] = "security_protocol_in"; + scsi_cmd[0xa3] = "report_supported_opcodes"; + scsi_cmd[0xa4] = "maintenance_out"; + scsi_cmd[0xa5] = "move_medium"; + scsi_cmd[0xa6] = "exchange_medium"; + scsi_cmd[0xa7] = "move_medium_attached"; + scsi_cmd[0xa8] = "read(12)"; + scsi_cmd[0xa9] = "service_action_out(12)"; + scsi_cmd[0xaa] = "write(12)"; + scsi_cmd[0xab] = "service_action_in(12)"; + scsi_cmd[0xac] = "get_performance/erase(12)"; + scsi_cmd[0xad] = "read_dvd_structure"; + scsi_cmd[0xae] = "write_verify(12)"; + scsi_cmd[0xaf] = "verify(12)"; + scsi_cmd[0xb0] = "search_data_high(12)"; + scsi_cmd[0xb1] = "search_data_equal(12)"; + scsi_cmd[0xb2] = "search_data_low(12)"; + scsi_cmd[0xb3] = "set_limits(12)"; + scsi_cmd[0xb4] = "read_element_status_attached"; + scsi_cmd[0xb5] = "security_protocol_out"; + scsi_cmd[0xb6] = "send_volume_tag"; + scsi_cmd[0xb7] = "read_defect_data(12)"; + scsi_cmd[0xb8] = "read_element_status"; + scsi_cmd[0xb9] = "read_cd_msf"; + scsi_cmd[0xba] = "redundancy_group_in"; + scsi_cmd[0xbb] = "redundancy_group_out"; + scsi_cmd[0xbc] = "spare_in"; + scsi_cmd[0xbd] = "spare_out"; + scsi_cmd[0xbe] = "volume_set_in"; + scsi_cmd[0xbf] = "volume_set_out"; + + /* Key codes */ + key_code[0x0] = "no_sense"; + key_code[0x1] = "soft_error"; + key_code[0x2] = "not_ready"; + key_code[0x3] = "medium_error"; + key_code[0x4] = "hardware_error"; + key_code[0x5] = "illegal_request"; + key_code[0x6] = "unit_attention"; + key_code[0x7] = "data_protect"; + key_code[0x8] = "blank_check"; + key_code[0x9] = "vendor_specific"; + key_code[0xa] = "copy_aborted"; + key_code[0xb] = "aborted_command"; + /* key_code[0xc] is obsolete */ + key_code[0xd] = "volume_overflow"; + key_code[0xe] = "miscompare"; + /* key_code[0xf] is reserved */ + + /* kcq codes - of the form key/asc/ascq */ + /* http://en.wikipedia.org/wiki/Key_Code_Qualifier */ + kcq_code[0x0,0x0,0x0] = "no error"; + kcq_code[0x5,0x24,0x0] = "illegal field in CDB"; + kcq_code[0x6,0x29,0x0] = "POR or device reset occurred"; + kcq_code[0x6,0x29,0x1] = "POR occurred"; + + /* IDM status codes */ + status[IDM_STATUS_FAIL] = "FAIL"; + status[IDM_STATUS_NORESOURCES] = "NORESOURCES"; + status[IDM_STATUS_REJECT] = "REJECT"; + status[IDM_STATUS_IO] = "IO"; + status[IDM_STATUS_ABORTED] = "ABORTED"; + status[IDM_STATUS_SUSPENDED] = "SUSPENDED"; + status[IDM_STATUS_HEADER_DIGEST] = "HEADER_DIGEST"; + status[IDM_STATUS_DATA_DIGEST] = "DATA_DIGEST"; + status[IDM_STATUS_PROTOCOL_ERROR] = "PROTOCOL_ERROR"; + status[IDM_STATUS_LOGIN_FAIL] = "LOGIN_FAIL"; + + printf("%-20s %-20s %s\n", "TIME", "CLIENT", "ERROR"); +} + +fbt::idm_pdu_complete:entry +/arg1 != IDM_STATUS_SUCCESS/ +{ + this->ic = args[0]->isp_ic; + this->remote = (this->ic->ic_raddr.ss_family == AF_INET) ? + inet_ntoa((ipaddr_t *)&((struct sockaddr_in *)& + this->ic->ic_raddr)->sin_addr) : + inet_ntoa6(&((struct sockaddr_in6 *)& + this->ic->ic_raddr)->sin6_addr); + + this->err = status[arg1] != NULL ? status[arg1] : lltostr(arg1); + printf("%-20Y %-20s %s\n", walltimestamp, this->remote, this->err); +} + +fbt::stmf_send_scsi_status:entry +/ args[0]->task_mgmt_function == 0 && args[0]->task_sense_data != 0 / +{ + /* TODO: get the client address */ + printf("%-20Y %-20s ", walltimestamp, "-"); + this->code = args[0]->task_cdb[0]; + this->cmd = scsi_cmd[this->code] != NULL ? + scsi_cmd[this->code] : lltostr(this->code); + printf("CMD=%s, ", this->cmd); + this->key_id = (unsigned)args[0]->task_sense_data[2]; + this->asc_id = (unsigned)args[0]->task_sense_data[12]; + this->ascq_id = (unsigned)args[0]->task_sense_data[13]; + this->key = key_code[this->key_id] != NULL ? key_code[this->key_id] : "TBD"; + this->kcq = kcq_code[this->key_id,this->asc_id,this->ascq_id] != NULL ? + kcq_code[this->key_id,this->asc_id,this->ascq_id] : "TBD"; + printf("Check condition, Key/ASC/ASCQ = %x/%x/%x (%s/%s)\n", + this->key_id, this->asc_id, this->ascq_id,this->key, this->kcq); +} diff --git a/iscsiwho.d b/iscsiwho.d new file mode 100644 index 0000000..416fd68 --- /dev/null +++ b/iscsiwho.d @@ -0,0 +1,33 @@ +#!/usr/sbin/dtrace -s +/* + * iscsiwho.d - Report iSCSI client events. Solaris Nevada, DTrace. + * + * This traces requested iSCSI events when run on an iSCSI server. + * + * USAGE: iscsiwho.d # Hit Ctrl-C to end + * + * FIELDS: + * REMOTE IP IP address of the client + * iSCSI EVENT iSCSI event type. + * COUNT Number of events traced + */ + +#pragma ident "@(#)iscsiwho.d 1.3 07/03/27 SMI" + +#pragma D option quiet + +dtrace:::BEGIN +{ + printf("Tracing... Hit Ctrl-C to end.\n"); +} + +iscsi*::: +{ + @events[args[0]->ci_remote, probename] = count(); +} + +dtrace:::END +{ + printf(" %-26s %14s %8s\n", "REMOTE IP", "iSCSI EVENT", "COUNT"); + printa(" %-26s %14s %@8d\n", @events); +} diff --git a/learn-d.d b/learn-d.d new file mode 100644 index 0000000..c304653 --- /dev/null +++ b/learn-d.d @@ -0,0 +1,6 @@ +BEGIN +{ + trace("hello world."); + exit(0); + +} \ No newline at end of file diff --git a/loadavg.d b/loadavg.d new file mode 100644 index 0000000..dd22d19 --- /dev/null +++ b/loadavg.d @@ -0,0 +1,17 @@ +#!/usr/sbin/dtrace -s +#pragma D option quiet + +profile:::tick-1sec + { + this->load1a = `hp_avenrun[0] / 65536; + this->load5a = `hp_avenrun[1] / 65536; + this->load15a = `hp_avenrun[2] / 65536; + this->load1b = ((`hp_avenrun[0] % 65536) * 100) / 65536; + this->load5b = ((`hp_avenrun[1] % 65536) * 100) / 65536; + this->load15b = ((`hp_avenrun[2] % 65536) * 100) / 65536; + + printf("%.05d %d.%02d %d.%02d %d.%02d\n", + walltimestamp/1000000000, this->load1a, this->load1b, this->load5a, + this->load5b, this->load15a, this->load15b); + + } \ No newline at end of file diff --git a/metaslab_free.d b/metaslab_free.d new file mode 100644 index 0000000..422b40c --- /dev/null +++ b/metaslab_free.d @@ -0,0 +1,53 @@ +#!/usr/sbin/dtrace -s +/* + * metaslab_free.d Show ZFS metaslab percent free on allocations. + * + * ZFS switches to a slower allocation algorithm when the free size in a + * metaslab (usually 16 Gbytes) is less than a percentage. The slower + * algorithm is best-fit instead of fast-fit. + * + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at http://smartos.org/CDDL + * + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file. + * + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Copyright (c) 2012 Joyent Inc., All rights reserved. + * Copyright (c) 2012 Brendan Gregg, All rights reserved. + */ + +#pragma D option quiet + +dtrace:::BEGIN +{ + printf("Tracing ZFS metaslab alloc. metaslab_df_free_pct = %d %%\n", + `metaslab_df_free_pct); +} + +fbt::metaslab_df_alloc:entry +{ + this->pct = args[0]->sm_space * 100 / args[0]->sm_size; + @[this->pct] = count(); +} + +profile:::tick-1s +{ + printf("\n%Y free %%pct by allocations:", walltimestamp); + printa(@); + trunc(@); +} \ No newline at end of file diff --git a/nfs-l.d b/nfs-l.d new file mode 100644 index 0000000..7013236 --- /dev/null +++ b/nfs-l.d @@ -0,0 +1,252 @@ +#!/usr/sbin/dtrace -s +#pragma D option quiet +#pragma D option defaultargs +#pragma D option switchrate=1000hz +#pragma D option dynvarsize=512m + + +inline int TICKS=$1; +inline string ADDR=$$2; + +dtrace:::BEGIN +{ + TIMER = ( TICKS != NULL ) ? TICKS : 1 ; + ticks = TIMER; + TITLE=10; + title = 0; + walltime= walltimestamp/1000000000; + type="R"; + io_tm[type]=0; + io_ct[type]=0; + io_sz[type]=0; + nfs_tm[type]=0; + nfs_ct[type]=0; + nfs_sz[type]=0; + zfs_tm[type]=0; + zfs_ct[type]=0; + zfs_sz[type]=0; + tcp_ct[type]=0; + tcp_sz[type]=0; + type="W"; + io_tm[type]=0; + io_ct[type]=0; + io_sz[type]=0; + nfs_tm[type]=0; + nfs_ct[type]=0; + nfs_sz[type]=0; + zfs_tm[type]=0; + zfs_ct[type]=0; + zfs_sz[type]=0; + tcp_ct[type]=0; + tcp_sz[type]=0; + @nfs_rmx=max(0); + @nfs_wmx=max(0); + @zfs_rmx=max(0); + @zfs_wmx=max(0); + @io_rmx=max(0); + @io_wmx=max(0); +} + +/* ===================== beg TCP ================================= */ +tcp:::send +/ ( ADDR == NULL || args[3]->tcps_raddr == ADDR ) && args[2]->ip_plength - args[4]->tcp_offset > 0 / +{ + this->type="R"; + tcp_ct[this->type]++; + tcp_sz[this->type]=tcp_sz[this->type]+ args[2]->ip_plength - args[4]->tcp_offset; + @tcprsz=quantize(args[2]->ip_plength - args[4]->tcp_offset); +} +tcp:::receive +/ ( ADDR == NULL || args[3]->tcps_raddr == ADDR ) && args[2]->ip_plength - args[4]->tcp_offset > 0 / +{ + this->type="W"; + tcp_ct[this->type]++; + tcp_sz[this->type]=tcp_sz[this->type]+ args[2]->ip_plength - args[4]->tcp_offset; + @tcpwsz=quantize(args[2]->ip_plength - args[4]->tcp_offset); +} +/* ===================== end TCP ================================= */ + +/* ===================== beg NFS ================================= */ +nfsv3:::op-read-start, nfsv3:::op-write-start ,nfsv4:::op-read-start +{ + tm[args[1]->noi_xid] = timestamp; + sz[args[1]->noi_xid] = args[2]->count ; +} +nfsv4:::op-write-start +{ + tm[args[1]->noi_xid] = timestamp; + sz[args[1]->noi_xid] = args[2]->data_len ; +} +nfsv3:::op-read-done, nfsv3:::op-write-done, nfsv4:::op-read-done, nfsv4:::op-write-done +/tm[args[1]->noi_xid]/ +{ + this->delta= timestamp - tm[args[1]->noi_xid]; + this->type = probename == "op-write-done" ? "W" : "R"; + nfs_tm[this->type]=nfs_tm[this->type]+this->delta; + @nfs_rmx=max( (this->type == "R" ? this->delta : 0)/1000000); + @nfs_wmx=max( (this->type == "W" ? this->delta : 0)/1000000); + nfs_ct[this->type]++; + nfs_sz[this->type]=nfs_sz[this->type]+ sz[args[1]->noi_xid]; + tm[args[1]->noi_xid] = 0; + sz[args[1]->noi_xid] = 0; +} +/* --------------------- end NFS --------------------------------- */ + +/* ===================== beg ZFS ================================= */ + +zfs_read:entry,zfs_write:entry +{ + self->ts = timestamp; + self->filepath = args[0]->v_path; + self->size = ((uio_t *)arg1)->uio_resid; +} + + +zfs_read:return,zfs_write:return +/self->ts / +{ + this->type = probefunc == "zfs_write" ? "W" : "R"; + this->delta=timestamp - self->ts ; + zfs_tm[this->type]= zfs_tm[this->type] + this->delta; + zfs_ct[this->type]++; + zfs_sz[this->type]=zfs_sz[this->type]+self->size; + @zfs_rmx=max( (this->type == "R" ? this->delta : 0)/1000000); + @zfs_wmx=max( (this->type == "W" ? this->delta : 0)/1000000); + self->ts=0; + self->filepath=0; + self->size=0; +} +/* --------------------- end ZFS --------------------------------- */ + + + +/* ===================== beg IO ================================= */ +io:::start +/ arg0 != NULL && args[0]->b_addr != 0 / +{ + tm_io[(struct buf *)arg0] = timestamp; + sz_io[(struct buf *)arg0] = args[0]->b_bcount; + +} +io:::done +/tm_io[(struct buf *)arg0]/ +{ + this->type = args[0]->b_flags & B_READ ? "R" : "W" ; + this->delta = ( timestamp - tm_io[(struct buf *)arg0]); + io_tm[this->type]=io_tm[this->type]+this->delta; + @io_rmx=max( (this->type == "R" ? this->delta : 0)/1000000); + @io_wmx=max( (this->type == "W" ? this->delta : 0)/1000000); + io_ct[this->type]++; + io_sz[this->type]=io_sz[this->type]+ sz_io[(struct buf *)arg0] ; + sz_io[(struct buf *)arg0] = 0; + tm_io[(struct buf *)arg0] = 0; +} +/* --------------------- end IO --------------------------------- */ + + + +profile:::tick-1sec / ticks > 0 / { ticks--; } + +profile:::tick-1sec +/ ticks == 0 / +{ + type="R"; + printf("-------------avg_ms--------MB/s--IO/sz/kb--max_ms--- summary\n"); + printf(" %s | IO : %4d.%03d %6d.%03d %4d ",type, + (io_sz[type]/8196 == 0 ? 0 : io_tm[type]/(io_sz[type]/8196))/1000000 , + ((io_sz[type]/8196 == 0 ? 0 : io_tm[type]/(io_sz[type]/8196))/1000) % 1000, + (io_sz[type]/TIMER)/1000000, + ((io_sz[type]/TIMER)/1000)%1000, + (io_ct[type] == 0 ? 0 : io_sz[type]/io_ct[type])/1000 ); + printa(" %@8u summary\n", @io_rmx); + printf(" %s | ZFS : %4d.%03d %6d.%03d %4d ",type, + (zfs_sz[type]/8196 == 0 ? 0 : zfs_tm[type]/(zfs_sz[type]/8196))/1000000, + ((zfs_sz[type]/8196 == 0 ? 0 : zfs_tm[type]/(zfs_sz[type]/8196))/1000) % 1000, + (zfs_sz[type]/TIMER)/1000000, + ((zfs_sz[type]/TIMER)/1000)%1000, + (zfs_ct[type] == 0 ? 0 : zfs_sz[type]/zfs_ct[type])/1000); + printa(" %@8u summary\n", @zfs_rmx); + printf(" %s | NFS : %4d.%03d %6d.%03d %4d ",type, + (nfs_sz[type]/8196 == 0 ? 0 : nfs_tm[type]/(nfs_sz[type]/8196))/1000000, + ((nfs_sz[type]/8196 == 0 ? 0 : nfs_tm[type]/(nfs_sz[type]/8196))/1000) % 1000, + (nfs_sz[type]/TIMER)/1000000, + ((nfs_sz[type]/TIMER)/1000)%1000, + (nfs_ct[type] == 0 ? 0 : nfs_sz[type]/nfs_ct[type])/1000); + printa(" %@8u summary ", @nfs_rmx ); + printf(" %8d \n", nfs_ct[type]); + printf(" %s | TCP : %6d.%03d ",type, + (tcp_sz[type]/TIMER)/1000000, + ((tcp_sz[type]/TIMER)/1000)%1000 + ); + printf(" summary %d\n",tcp_ct[type]/TIMER); + type="W"; + printf(" ------ summary\n"); + printf(" %s | IO : %4d.%03d %6d.%03d %4d ",type, + (io_sz[type]/8196 == 0 ? 0 : io_tm[type]/(io_sz[type]/8196))/1000000 , + ((io_sz[type]/8196 == 0 ? 0 : io_tm[type]/(io_sz[type]/8196))/1000) % 1000, + (io_sz[type]/TIMER)/1000000, + ((io_sz[type]/TIMER)/1000)%1000, + (io_ct[type] == 0 ? 0 : io_sz[type]/io_ct[type])/1000 ); + printa(" %@8u summary\n", @io_wmx); + printf(" %s | ZFS : %4d.%03d %6d.%03d %4d ",type, + (zfs_sz[type]/8196 == 0 ? 0 : zfs_tm[type]/(zfs_sz[type]/8196))/1000000, + ((zfs_sz[type]/8196 == 0 ? 0 : zfs_tm[type]/(zfs_sz[type]/8196))/1000) % 1000, + (zfs_sz[type]/TIMER)/1000000, + ((zfs_sz[type]/TIMER)/1000)%1000, + (zfs_ct[type] == 0 ? 0 : zfs_sz[type]/zfs_ct[type])/1000); + printa(" %@8u summary\n", @zfs_wmx); + printf(" %s | NFS : %4d.%03d %6d.%03d %4d ",type, + (nfs_sz[type]/8196 == 0 ? 0 : nfs_tm[type]/(nfs_sz[type]/8196))/1000000, + ((nfs_sz[type]/8196 == 0 ? 0 : nfs_tm[type]/(nfs_sz[type]/8196))/1000) % 1000, + (nfs_sz[type]/TIMER)/1000000, + ((nfs_sz[type]/TIMER)/1000)%1000, + (nfs_ct[type] == 0 ? 0 : nfs_sz[type]/nfs_ct[type])/1000); + printa(" %@8u summary\n", @nfs_wmx); + printf(" %s | TCP : %6d.%03d ",type, + (tcp_sz[type]/TIMER)/1000000, + ((tcp_sz[type]/TIMER)/1000)%1000 + ); + printf(" summary %d\n",tcp_ct[type]/TIMER); + printf(" IOPs %u summary\n", (io_ct["W"]+io_ct["R"])/TIMER); +/* + printa("tcp in (sz) %@d\n",@tcpwsz); + printa("tcp out (sz) %@d\n",@tcprsz); +*/ + type="R"; + trunc(@tcpwsz); + trunc(@tcprsz); + io_tm[type]=0; + io_ct[type]=0; + io_sz[type]=0; + nfs_tm[type]=0; + nfs_ct[type]=0; + nfs_sz[type]=0; + zfs_tm[type]=0; + zfs_ct[type]=0; + zfs_sz[type]=0; + tcp_ct[type]=0; + tcp_sz[type]=0; + type="W"; + io_tm[type]=0; + io_ct[type]=0; + io_sz[type]=0; + nfs_tm[type]=0; + nfs_ct[type]=0; + nfs_sz[type]=0; + zfs_tm[type]=0; + zfs_ct[type]=0; + zfs_sz[type]=0; + tcp_ct[type]=0; + tcp_sz[type]=0; + ticks= TIMER; + title--; + clear(@nfs_rmx); + clear(@nfs_wmx); + clear(@zfs_rmx); + clear(@zfs_wmx); + clear(@io_rmx); + clear(@io_wmx); +} + +/* use if you want to print something every TITLE lines */ +profile:::tick-1sec / title <= 0 / { title=TITLE; } \ No newline at end of file diff --git a/nfsrpcattr.d b/nfsrpcattr.d new file mode 100644 index 0000000..b32531f --- /dev/null +++ b/nfsrpcattr.d @@ -0,0 +1,31 @@ +syscall:::entry +{ + + self->syscallname = probefunc; +} + +syscall:::return +{ + + self->syscallname = ""; +} + +nfsclient::: +/self->syscallname != 0 && self->syscallname != ""/ +{ + + trace(probemod); + trace(arg0); + trace(execname); + trace(self->syscallname); +} + +nfsclient::: +/self->syscallname == 0 || self->syscallname == ""/ +{ + + trace(probemod); + trace(arg0); + trace(execname); + trace("-"); +} diff --git a/nfsv3errors.d b/nfsv3errors.d new file mode 100644 index 0000000..e625cf9 --- /dev/null +++ b/nfsv3errors.d @@ -0,0 +1,70 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet +#pragma D option switchrate=10hz + +dtrace:::BEGIN +{ + /* See NFS3ERR_* in /usr/include/nfs/nfs.h */ + nfs3err[0] = "NFS3_OK"; + nfs3err[1] = "PERM"; + nfs3err[2] = "NOENT"; + nfs3err[5] = "IO"; + nfs3err[6] = "NXIO"; + nfs3err[13] = "ACCES"; + nfs3err[17] = "EXIST"; + nfs3err[18] = "XDEV"; + nfs3err[19] = "NODEV"; + nfs3err[20] = "NOTDIR"; + nfs3err[21] = "ISDIR"; + nfs3err[22] = "INVAL"; + nfs3err[27] = "FBIG"; + nfs3err[28] = "NOSPC"; + nfs3err[30] = "ROFS"; + nfs3err[31] = "MLINK"; + nfs3err[63] = "NAMETOOLONG"; + nfs3err[66] = "NOTEMPTY"; + nfs3err[69] = "DQUOT"; + nfs3err[70] = "STALE"; + nfs3err[71] = "REMOTE"; + nfs3err[10001] = "BADHANDLE"; + nfs3err[10002] = "NOT_SYNC"; + nfs3err[10003] = "BAD_COOKIE"; + nfs3err[10004] = "NOTSUPP"; + nfs3err[10005] = "TOOSMALL"; + nfs3err[10006] = "SERVERFAULT"; + nfs3err[10007] = "BADTYPE"; + nfs3err[10008] = "JUKEBOX"; + + printf(" %-18s %5s %-12s %-16s %s\n", "NFSv3 EVENT", "ERR", "CODE", + "CLIENT", "PATHNAME"); +} + +nfsv3:nfssrv::op-commit-done, +nfsv3:nfssrv::op-pathconf-done, +nfsv3:nfssrv::op-fsinfo-done, +nfsv3:nfssrv::op-fsstat-done, +nfsv3:nfssrv::op-readdirplus-done, +nfsv3:nfssrv::op-readdir-done, +nfsv3:nfssrv::op-link-done, +nfsv3:nfssrv::op-rename-done, +nfsv3:nfssrv::op-rmdir-done, +nfsv3:nfssrv::op-remove-done, +nfsv3:nfssrv::op-mknod-done, +nfsv3:nfssrv::op-symlink-done, +nfsv3:nfssrv::op-mkdir-done, +nfsv3:nfssrv::op-create-done, +nfsv3:nfssrv::op-write-done, +nfsv3:nfssrv::op-read-done, +nfsv3:nfssrv::op-readlink-done, +nfsv3:nfssrv::op-access-done, +nfsv3:nfssrv::op-lookup-done, +nfsv3:nfssrv::op-setattr-done, +nfsv3:nfssrv::op-getattr-done +/args[2]->status != 0/ +{ + this->err = args[2]->status; + this->str = nfs3err[this->err] != NULL ? nfs3err[this->err] : "?"; + printf(" %-18s %5d %-12s %-16s %s\n", probename, this->err, + this->str, args[0]->ci_remote, args[1]->noi_curpath); +} diff --git a/nfsv3io.d b/nfsv3io.d new file mode 100644 index 0000000..3406d04 --- /dev/null +++ b/nfsv3io.d @@ -0,0 +1,48 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +dtrace:::BEGIN +{ + interval = 5; + printf("Tracing... Interval %d secs.\n", interval); + tick = interval; +} + +nfsv3:::op-* +{ + @ops[args[0]->ci_remote] = count(); +} + +nfsv3:::op-read-done +{ + @reads[args[0]->ci_remote] = count(); + @readbytes[args[0]->ci_remote] = sum(args[2]->res_u.ok.data.data_len); +} + + +nfsv3:::op-write-done +{ + @writes[args[0]->ci_remote] = count(); + @writebytes[args[0]->ci_remote] = sum(args[2]->res_u.ok.count); +} + +profile:::tick-1sec +/tick-- == 0/ +{ + normalize(@ops, interval); + normalize(@reads, interval); + normalize(@writes, interval); + normalize(@writebytes, 1024 * interval); + normalize(@readbytes, 1024 * interval); + printf("\n %-32s %6s %6s %6s %6s %8s\n", "Client", "r/s", "w/s", + "kr/s", "kw/s", "ops/s"); + printa(" %-32s %@6d %@6d %@6d %@6d %@8d\n", @reads, @writes, + @readbytes, @writebytes, @ops); + trunc(@ops); + trunc(@reads); + trunc(@writes); + trunc(@readbytes); + trunc(@writebytes); + tick = interval; +} \ No newline at end of file diff --git a/nfsv3ops.d b/nfsv3ops.d new file mode 100644 index 0000000..93f7467 --- /dev/null +++ b/nfsv3ops.d @@ -0,0 +1,21 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +dtrace:::BEGIN +{ + trace("Tracing... Interval 5 secs.\n"); +} + +nfsv3:::op-* +{ + @ops[args[0]->ci_remote, probename] = count(); +} + +profile:::tick-5sec, +dtrace:::END +{ + printf("\n %-32s %-28s %8s\n", "Client", "Operation", "Count"); + printa(" %-32s %-28s %@8d\n", @ops); + trunc(@ops); +} \ No newline at end of file diff --git a/nfsv3rwsnoop-2.d b/nfsv3rwsnoop-2.d new file mode 100644 index 0000000..6a1b172 --- /dev/null +++ b/nfsv3rwsnoop-2.d @@ -0,0 +1,27 @@ +#!/usr/sbin/dtrace -s +#pragma D option quiet +#pragma D option switchrate=10hz +dtrace:::BEGIN +{ + printf("%16s\t%20s\t%-18s\t%2s\t%-10s\t%6s\t%s\t%s\n", "TIME(us)", "TIME", + "CLIENT", "OP", "OFFSET(KB)", "BYTES", "4K-ALIGNED", "4K-SIZE"); +} +nfsv3:::op-read-start +{ + printf("%-16d\t%Y\t%-18s\t%2s\t%-10d\t%6d\t%s\t%s\n", + timestamp / 1000, walltimestamp, + args[0]->ci_remote, "R", args[2]->offset / 1024, + args[2]->count, + (args[2]->offset % 4096) == 0 ? "1" : "0", + (args[2]->count % 4096) == 0 ? "1" : "0"); +} +nfsv3:::op-write-start +{ + printf("%-16d\t%Y\t%-18s\t%2s\t%-10d\t%6d\t%s\t%s\n", + timestamp / 1000, walltimestamp, + args[0]->ci_remote, "W", args[2]->offset / 1024, + args[2]->data.data_len, + (args[2]->offset % 4096) == 0 ? "1" : "0", + (args[2]->data.data_len % 4096) == 0 ? "1" : "0"); +} + diff --git a/processtrace.d b/processtrace.d new file mode 100644 index 0000000..b3db9a6 --- /dev/null +++ b/processtrace.d @@ -0,0 +1,30 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet +#pragma D option switchrate=10 + +proc:::create +{ + printf("%s %d %d 0 0 %s fork - - %d\n", zonename, pid, args[0]->pr_pid, + execname, timestamp); + start[args[0]->pr_pid] = timestamp; +} + +proc:::exec-success +{ + printf("%s %d %d 0 0 %s exec - - %d\n", zonename, ppid, pid, + execname, timestamp); +} + +proc:::exit +/(this->s = start[pid])/ +{ + printf("%s %d %d 0 0 %s exit %d ms %d\n", zonename, ppid, pid, + execname, (timestamp - this->s) / 1000000, timestamp); + start[pid] = 0; +} + +profile:::tick-1s +{ + printf("- - - - - - tick 1s - %d\n", timestamp); +} diff --git a/rwtime.d b/rwtime.d new file mode 100644 index 0000000..af22510 --- /dev/null +++ b/rwtime.d @@ -0,0 +1,12 @@ +syscall::read:entry, +syscall::write:entry +/pid == $1/ +{ + ts[probefunc] = timestamp; +} +syscall::read:return, +syscall::write:return +/pid == $1 && ts[probefunc] != 0/ +{ + printf("%d nsecs", timestamp - ts[probefunc]); +} \ No newline at end of file diff --git a/rwtimesize.d b/rwtimesize.d new file mode 100644 index 0000000..8a851e3 --- /dev/null +++ b/rwtimesize.d @@ -0,0 +1,56 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +dtrace:::BEGIN +{ + printf("Tracing... Hit Ctrl-C to end.\n"); +} + +io:::start +{ + start_time[arg0] = timestamp; + /* Storing buffer size to make sure that io:::done probes are triggered + only when this value is greater than 0 */ + trig = args[0]->b_bufsize; +} + +io:::done +/(args[0]->b_flags & B_READ) && (this->start = start_time[arg0]) && trig > 0/ +{ + this->delta = (timestamp - this->start) / 1000; + printf("[READ] Device: %5s Size (bytes): %5d Time (us): %5u \n", + args[1]->dev_statname, args[0]->b_bcount, this->delta); +/* @plots[args[1]->dev_statname, args[0]->b_bcount] = quantize(this->delta); */ + /* Figure out average byte size*/ + this->size = args[0]->b_bcount; + @bsize["average read, bytes"] = avg(this->size); + @plots["read I/O, us"] = quantize(this->delta); + @avgs["average read I/O, us"] = avg(this->delta); + start_time[arg0] = 0; +} + +io:::done +/!(args[0]->b_flags & B_READ) && (this->start = start_time[arg0]) && trig > 0/ +{ + this->delta = (timestamp - this->start) / 1000; + printf("[WRITE] Device: %5s Size (bytes): %5d Time (us): %5u \n", + args[1]->dev_statname, args[0]->b_bcount, this->delta); +/* @plots[args[1]->dev_statname, args[0]->b_bcount] = quantize(this->delta); */ + /* Figure out average byte size*/ + this->size = args[0]->b_bcount; + @bsize["average write, bytes"] = avg(this->size); + @plots["write I/O, us"] = quantize(this->delta); + @avgs["average write I/O, us"] = avg(this->delta); + start_time[arg0] = 0; +} + +dtrace:::END +/trig > 0/ +{ + printf("\nI/O completed time and size summary:\n\n"); + printa("\t%s %@d\n", @avgs); + printa("\t%s %@d\n", @bsize); + printa("\n %s\n%@d\n", @plots); + trig = 0; +} diff --git a/scsi.d b/scsi.d new file mode 100644 index 0000000..b31a210 --- /dev/null +++ b/scsi.d @@ -0,0 +1,942 @@ +#!/usr/sbin/dtrace -qCs + +/* + * Chris.Gerhard@sun.com + * Joel.Buckley@sun.com + */ + +#pragma ident "@(#)scsi.d 1.18 09/03/20 SMI" + +/* + * SCSI logging via dtrace. + * + * See http://blogs.sun.com/chrisg/tags/scsi.d + * + * Usage: + * + * scsi.d [ @T time ][ hba [ X ]] + * + * With no arguments it logs every scsi packet out and as many returns + * as it can find. + * + * So to trace all the io going via instance 2 qus driver do: + * + * scsi.d qus 2 + * + * To do the same trace for 20 seconds do: + * + * scsi.d @T 20 qus 2 + * + * NOTE: the "@" is used instead of the more traditional "-" as dtrace will + * continue to parse arguments so would see "-T" as an argument to dtrace and + * produce a usage error. + * + * Temporarily, ie I intend to change this in the future, scsi.d is also + * taking some more arguments using the -D options as well. I suspect I will + * get rid of the bizarre @T options above and change to using -D completely + * but that will be some point in the future, if at all. + * + * The new options are: + * + * -D EXECNAME='"foo"' + * Which results scsi.d only reporting IO associated with the application "foo". + * -D PRINT_STACK + * Which results in scsi.d printing a kernel stack trace after every outgoing + * packet. + * -D QUIET + * Which results in none of the packets being printed. Kind of pointless + * without another option. + * -D PERF_REPORT + * Which results in a report of how long IOs took aggregated per HBA useful + * with -D QUIET to get performance statistics. + * -D TARGET_STATS + * aggregate the stats based on the target. + * -D LUN_STATS + * aggregate the stats based on the LUN. Requires TARGET_STATS + * -D DYNVARSIZE + * pass this value to the #pragma D option dynvarsize= option. + * -D HBA + * the name of the HBA we are interested in. + * -D MIN_LBA + * Only report logical blocks over this value + * -D MAX_LBA + * Only IOs to report logical blocks that are less than this value. + * -D REPORT_OVERTIME=N + * Only report IOs that have taken longer than this number of nanoseconds. + * This only stops the printing of the packets not the collection of + * statistics. + * There are some tuning options that take effect only when + * REPORT_OVERTIME is set. These are: + * -D NSPEC=N + * Set the number of speculations to this value. + * -D SPECSIZE=N + * Set the size of the speculaton buffer. This should be 200 * + * the size of NSPEC. + * -D CLEANRATE=N + * Specify the clean rate. + * + * Finally scsi.d will also now accept the dtrace -c and -p options to trace + * just the commands or process given. + * + * Since dtrace does not output in real time it is useful to sort the output + * of the script using sort -n to get the entries in chronological order. + * + * NOTE: This does not directly trace what goes onto the scsi bus or fibre, + * to do so would require this script have knowledge of every HBA that could + * ever be connected to a system. It traces the SCSI packets as they are + * passed from the target driver to the HBA in the SCSA layer and then back + * again. Although to get the packet when it is returned it guesses that the + * packet will be destroyed using scsi_destroy_pkt and not modified before it + * is. So far this has worked but there is no garauntee that it will work for + * all HBAs and target drivers in the future. + * + */ + +/* + * This can be used as a framework to report on any rountine that has + * a scsi_pkt passed into it. Search for "FRAMEWORK" to see where you + * have to add bits. + */ +#pragma D option quiet +#pragma D option defaultargs + +#ifdef DYNVARSIZE +#pragma D option dynvarsize=DYNVARSIZE +#endif + +#ifdef REPORT_OVERTIME + +#ifdef NSPEC +#pragma D option nspec=NSPEC +#else +#pragma D option nspec=1000 +#endif /* NSPEC */ + + +#ifdef SPECSIZE +#pragma D option specsize=SPECSIZE +#else +#pragma D option specsize=200000 +#endif /* SPECSIZE */ + +#ifdef CLEANRATE +#pragma D option cleanrate=CLEANRATE +#endif /* CLEANRATE */ + +#endif /* REPORT_OVERTIME */ + +/* + * Have to include scsi_types.h so that in releases where scsi_address + * contains a union and #defines for a_target, a_lun & a_sublun they get + * picked up. + */ +#include + +/* + * Now work around CR 6798038. Depending on how that CR is fixed this + * workaround may have to be revisited so that the script will work in + * all cases. + */ +#ifdef a_lun +#undef a_lun +#define my_alun a.spi.a_lun +#else +#define my_alun a_lun +#endif + +#ifdef a_target +#undef a_target +#define my_atarget a.spi.a_target +#else +#define my_atarget a_target +#endif + +/* + * Just for completeness. The script does not use sublun so these are not + * needed but do no harm. + */ +#ifdef a_sublun +#undef a_sublun +#define my_asublun a.spi.a_sublun +#else +#define my_asublun a_sublun +#endif + +/* + * Set both lba & len to 64Bit values, to avoid type mismatch later... + * + * The following does not function due to DTrace insufficient registers... + * + * #define INT64(A, B, C, D, E, F, G, H) ( \ + * (((A) & 0x0ffL) << 56) | \ + * (((B) & 0x0ffL) << 48) | \ + * (((C) & 0x0ffL) << 40) | \ + * (((D) & 0x0ffL) << 32) | \ + * (((E) & 0x0ffL) << 24) | \ + * (((F) & 0x0ffL) << 16) | \ + * (((G) & 0x0ffL) << 8) | \ + * ((H) & 0x0ffL)) + * + * Instead, calculate lower & upper values, then pass them into this macro: + */ +#define INT64(A, B) ( \ + (((A) & 0x0ffffffffLL) << 32) | \ + ((B) & 0x0ffffffffL)) + +#define INT32(A, B, C, D) ( \ + (((A) & 0x0ffL) << 24) | \ + (((B) & 0x0ffL) << 16) | \ + (((C) & 0x0ffL) << 8) | \ + ((D) & 0x0ffL)) +#define INT16(A, B) ( \ + (((A) & 0x0ffL) << 8) | \ + ((B) & 0x0ffL)) +#define INT8(A) ( \ + ((A) & 0x0ffL)) + +/* #define A_TO_TRAN(ap) ((ap)->a_hba_tran) */ +#define P_TO_TRAN(pkt) ((pkt)->pkt_address.a_hba_tran) +/* #define P_TO_ADDR(pkt) (&((pkt)->pkt_address)) */ +#define P_TO_DEVINFO(pkt) ((struct dev_info *)(P_TO_TRAN(pkt)->tran_hba_dip)) + +#define DEV_NAME(pkt) \ + stringof(`devnamesp[P_TO_DEVINFO(pkt)->devi_major].dn_name) /* ` */ + +#define DEV_INST(pkt) (P_TO_DEVINFO(pkt)->devi_instance) + +#ifdef MIN_BLOCK +#define MIN_TEST && this->lba >= (MIN_BLOCK) +#else +#define MIN_TEST +#endif +#ifdef MAX_BLOCK +#define MAX_TEST && (this->lba <= ((MAX_BLOCK) + this->len)) +#else +#define MAX_TEST +#endif + +#define ARGUMENT_TEST (this->pkt && (hba_set == 0 || \ + hba == DEV_NAME(this->pkt) && \ + (inst_set == 0 || inst == DEV_INST(this->pkt))) MIN_TEST MAX_TEST) + +/* + * PRINT_DEV_FROM_PKT + * + * From a scsi_pkt get the name of the driver and the instance of the + * hba. + */ +#define PRINT_DEV_FROM_PKT(pkt) \ + printf("%s%d:", DEV_NAME(pkt), DEV_INST(pkt)) + +#define PRINT_TIMESTAMP() printf("%5.5d.%9.9d ", \ + (timestamp - script_start_time)/1000000000, \ + (timestamp - script_start_time)%1000000000); + +/* + * Look away now. + * + * This is just hackery to get around me being so stubborn I + * don't want to wrap this in a shell script. + */ +#ifdef HBA +BEGIN { + hba = HBA; + hba_set = 1; + timeout = 0; + inst_set = 0; + inst = 0; +} +#else + +#define TIMEOUT_SELECT "@T" + +#define DO_ARGS(N, NS, M, MS) \ +BEGIN \ +/ hba_set == 0 && NS != "" && NS != TIMEOUT_SELECT / \ +{ \ + hba = NS; \ + hba_set = 1; \ +} \ +BEGIN \ +/ hba_set == 1 && inst_set == 0 && MS != "" / \ +{ \ + inst = M; \ + inst_set = 1; \ +} \ + +BEGIN +/ $$1 == TIMEOUT_SELECT / +{ + timeout = $2; +} + +#endif + +BEGIN +/ $$target != "" / +{ + target = $target; +} + +BEGIN +{ +#ifdef EXECNAME + scsi_d_target_execname = EXECNAME; +#else + scsi_d_target_execname = ""; +#endif +} +#ifndef HBA +DO_ARGS($1, $$1, $2, $$2) +DO_ARGS($3, $$3, $4, $$4) +#endif + +/* + * You can open your eyes again now. + */ + +BEGIN { +#ifdef REPORT_OVERTIME + printf("Only reporting IOs longer than %dns\n", REPORT_OVERTIME); +#endif + printf("Hit Control C to interrupt\n"); + script_start_time = timestamp; + end_time = timestamp + (timeout * 1000000000); + + scsi_ops[0x000, 0x0] = "TEST_UNIT_READY"; + scsi_ops[0x001, 0x0] = "REZERO_UNIT_or_REWIND"; + scsi_ops[0x003, 0x0] = "REQUEST_SENSE"; + scsi_ops[0x004, 0x0] = "FORMAT_UNIT"; + scsi_ops[0x005, 0x0] = "READ_BLOCK_LIMITS"; + scsi_ops[0x006, 0x0] = "Unknown(06)"; + scsi_ops[0x007, 0x0] = "REASSIGN_BLOCKS"; + scsi_ops[0x008, 0x0] = "READ(6)"; + scsi_ops[0x009, 0x0] = "Unknown(09)"; + scsi_ops[0x00a, 0x0] = "WRITE(6)"; + scsi_ops[0x00b, 0x0] = "SEEK(6)"; + scsi_ops[0x00c, 0x0] = "Unknown(0c)"; + scsi_ops[0x00d, 0x0] = "Unknown(0d)"; + scsi_ops[0x00e, 0x0] = "Unknown(0e)"; + scsi_ops[0x00f, 0x0] = "READ_REVERSE"; + scsi_ops[0x010, 0x0] = "WRITE_FILEMARK"; + scsi_ops[0x011, 0x0] = "SPACE"; + scsi_ops[0x012, 0x0] = "INQUIRY"; + scsi_ops[0x013, 0x0] = "VERIFY"; + scsi_ops[0x014, 0x0] = "Unknown(14)"; + scsi_ops[0x015, 0x0] = "MODE_SELECT(6)"; + scsi_ops[0x016, 0x0] = "RESERVE(6)"; + scsi_ops[0x017, 0x0] = "RELEASE(6)"; + scsi_ops[0x018, 0x0] = "COPY"; + scsi_ops[0x019, 0x0] = "ERASE(6)"; + scsi_ops[0x01a, 0x0] = "MODE_SENSE(6)"; + scsi_ops[0x01b, 0x0] = "START_STOP_UNIT"; + scsi_ops[0x01c, 0x0] = "RECIEVE_DIAGNOSTIC_RESULTS"; + scsi_ops[0x01d, 0x0] = "SEND_DIAGNOSTIC"; + scsi_ops[0x01e, 0x0] = "PREVENT_ALLOW_MEDIUM_REMOVAL"; + scsi_ops[0x01f, 0x0] = "Unknown(1f)"; + scsi_ops[0x020, 0x0] = "Unknown(20)"; + scsi_ops[0x021, 0x0] = "Unknown(21)"; + scsi_ops[0x022, 0x0] = "Unknown(22)"; + scsi_ops[0x023, 0x0] = "READ_FORMAT_CAPACITY"; + scsi_ops[0x024, 0x0] = "Unknown(24)"; + scsi_ops[0x025, 0x0] = "READ_CAPACITY(10)"; + scsi_ops[0x026, 0x0] = "Unknown(26)"; + scsi_ops[0x027, 0x0] = "Unknown(27)"; + scsi_ops[0x028, 0x0] = "READ(10)"; + scsi_ops[0x02a, 0x0] = "WRITE(10)"; + scsi_ops[0x02b, 0x0] = "SEEK(10)_or_LOCATE(10)"; + scsi_ops[0x02e, 0x0] = "WRITE_AND_VERIFY(10)"; + scsi_ops[0x02f, 0x0] = "VERIFY(10)"; + scsi_ops[0x030, 0x0] = "SEARCH_DATA_HIGH"; + scsi_ops[0x031, 0x0] = "SEARCH_DATA_EQUAL"; + scsi_ops[0x032, 0x0] = "SEARCH_DATA_LOW"; + scsi_ops[0x033, 0x0] = "SET_LIMITS(10)"; + scsi_ops[0x034, 0x0] = "PRE-FETCH(10)"; + scsi_ops[0x035, 0x0] = "SYNCHRONIZE_CACHE(10)"; + scsi_ops[0x036, 0x0] = "LOCK_UNLOCK_CACHE(10)"; + scsi_ops[0x037, 0x0] = "READ_DEFECT_DATA(10)"; + scsi_ops[0x039, 0x0] = "COMPARE"; + scsi_ops[0x03a, 0x0] = "COPY_AND_WRITE"; + scsi_ops[0x03b, 0x0] = "WRITE_BUFFER"; + scsi_ops[0x03c, 0x0] = "READ_BUFFER"; + scsi_ops[0x03e, 0x0] = "READ_LONG"; + scsi_ops[0x03f, 0x0] = "WRITE_LONG"; + scsi_ops[0x040, 0x0] = "CHANGE_DEFINITION"; + scsi_ops[0x041, 0x0] = "WRITE_SAME(10)"; + scsi_ops[0x04c, 0x0] = "LOG_SELECT"; + scsi_ops[0x04d, 0x0] = "LOG_SENSE"; + scsi_ops[0x050, 0x0] = "XDWRITE(10)"; + scsi_ops[0x051, 0x0] = "XPWRITE(10)"; + scsi_ops[0x052, 0x0] = "XDREAD(10)"; + scsi_ops[0x053, 0x0] = "XDWRITEREAD(10)"; + scsi_ops[0x055, 0x0] = "MODE_SELECT(10)"; + scsi_ops[0x056, 0x0] = "RESERVE(10)"; + scsi_ops[0x057, 0x0] = "RELEASE(10)"; + scsi_ops[0x05a, 0x0] = "MODE_SENSE(10)"; + scsi_ops[0x05e, 0x0] = "PERSISTENT_RESERVE_IN"; + scsi_ops[0x05f, 0x0] = "PERSISTENT_RESERVE_OUT"; + scsi_ops[0x07f, 0x0] = "Variable_Length_CDB"; + scsi_ops[0x07f, 0x3] = "XDREAD(32)"; + scsi_ops[0x07f, 0x4] = "XDWRITE(32)"; + scsi_ops[0x07f, 0x6] = "XPWRITE(32)"; + scsi_ops[0x07f, 0x7] = "XDWRITEREAD(32)"; + scsi_ops[0x07f, 0x9] = "READ(32)"; + scsi_ops[0x07f, 0xb] = "WRITE(32)"; + scsi_ops[0x07f, 0xa] = "VERIFY(32)"; + scsi_ops[0x07f, 0xc] = "WRITE_AND_VERIFY(32)"; + scsi_ops[0x080, 0x0] = "XDWRITE_EXTENDED(16)"; + scsi_ops[0x081, 0x0] = "REBUILD(16)"; + scsi_ops[0x082, 0x0] = "REGENERATE(16)"; + scsi_ops[0x083, 0x0] = "EXTENDED_COPY"; + scsi_ops[0x086, 0x0] = "ACCESS_CONTROL_IN"; + scsi_ops[0x087, 0x0] = "ACCESS_CONTROL_OUT"; + scsi_ops[0x088, 0x0] = "READ(16)"; + scsi_ops[0x08a, 0x0] = "WRITE(16)"; + scsi_ops[0x08c, 0x0] = "READ_ATTRIBUTES"; + scsi_ops[0x08d, 0x0] = "WRITE_ATTRIBUTES"; + scsi_ops[0x08e, 0x0] = "WRITE_AND_VERIFY(16)"; + scsi_ops[0x08f, 0x0] = "VERIFY(16)"; + scsi_ops[0x090, 0x0] = "PRE-FETCH(16)"; + scsi_ops[0x091, 0x0] = "SYNCHRONIZE_CACHE(16)"; + scsi_ops[0x092, 0x0] = "LOCK_UNLOCK_CACHE(16)_or_LOCATE(16)"; + scsi_ops[0x093, 0x0] = "WRITE_SAME(16)_or_ERASE(16)"; + scsi_ops[0x09e, 0x0] = "SERVICE_IN_or_READ_CAPACITY(16)"; + scsi_ops[0x0a0, 0x0] = "REPORT_LUNS"; + scsi_ops[0x0a3, 0x0] = "MAINTENANCE_IN_or_REPORT_TARGET_PORT_GROUPS"; + scsi_ops[0x0a4, 0x0] = "MAINTENANCE_OUT_or_SET_TARGET_PORT_GROUPS"; + scsi_ops[0x0a7, 0x0] = "MOVE_MEDIUM"; + scsi_ops[0x0a8, 0x0] = "READ(12)"; + scsi_ops[0x0aa, 0x0] = "WRITE(12)"; + scsi_ops[0x0ae, 0x0] = "WRITE_AND_VERIFY(12)"; + scsi_ops[0x0af, 0x0] = "VERIFY(12)"; + scsi_ops[0x0b3, 0x0] = "SET_LIMITS(12)"; + scsi_ops[0x0b4, 0x0] = "READ_ELEMENT_STATUS"; + scsi_ops[0x0b7, 0x0] = "READ_DEFECT_DATA(12)"; + scsi_ops[0x0ba, 0x0] = "REDUNDANCY_GROUP_IN"; + scsi_ops[0x0bb, 0x0] = "REDUNDANCY_GROUP_OUT"; + scsi_ops[0x0bc, 0x0] = "SPARE_IN"; + scsi_ops[0x0bd, 0x0] = "SPARE_OUT"; + scsi_ops[0x0be, 0x0] = "VOLUME_SET_IN"; + scsi_ops[0x0bf, 0x0] = "VOLUME_SET_OUT"; + scsi_ops[0x0d0, 0x0] = "EXPLICIT_LUN_FAILOVER"; + scsi_ops[0x0f1, 0x0] = "STOREDGE_CONTROLLER"; + + scsi_reasons[0] = "COMPLETED"; + scsi_reasons[1] = "INCOMPLETE"; + scsi_reasons[2] = "DMA_ERR"; + scsi_reasons[3] = "TRAN_ERR"; + scsi_reasons[4] = "RESET"; + scsi_reasons[5] = "ABORTED"; + scsi_reasons[6] = "TIMEOUT"; + scsi_reasons[7] = "DATA_OVERRUN"; + scsi_reasons[8] = "COMMAND_OVERRUN"; + scsi_reasons[9] = "STATUS_OVERRUN"; + scsi_reasons[10] = "Bad_Message"; + scsi_reasons[11] = "No_Message_Out"; + scsi_reasons[12] = "XID_Failed"; + scsi_reasons[13] = "IDE_Failed"; + scsi_reasons[14] = "Abort_Failed"; + scsi_reasons[15] = "Reject_Failed"; + scsi_reasons[16] = "Nop_Failed"; + scsi_reasons[17] = "Message_Parity_Error_Failed"; + scsi_reasons[18] = "Bus_Device_Reset_Failed"; + scsi_reasons[19] = "Identify_Message_Rejected"; + scsi_reasons[20] = "Unexpected_Bus_free"; + scsi_reasons[21] = "Tag_Rejected"; + scsi_reasons[22] = "TERMINATED"; + scsi_reasons[24] = "Device_Gone"; + + scsi_state[0] = "Success"; + scsi_state[0x2] = "Check_Condition"; + scsi_state[0x4] = "Condition_Met"; + scsi_state[0x8] = "Busy"; + scsi_state[0x10] = "Intermidiate"; + scsi_state[0x14] = "Intermidiate-Condition_Met"; + scsi_state[0x18] = "Reservation_Conflict"; + scsi_state[0x22] = "Command_Terminated"; + scsi_state[0x28] = "Queue_Full"; + scsi_state[0x30] = "ACA_Active"; + + +/* FRAMEWORK:- add your special string to this array */ + names[0] = "->"; /* Command sent */ + names[1] = "<-"; /* Command returning */ + names[2] = "ILL"; /* Illegal Request spotted */ +} + +fbt:scsi:scsi_transport:entry, +fbt:scsi:scsi_destroy_pkt:entry +/ timeout != 0 && end_time < timestamp / +{ + exit(0); +} + +/* + * relying on scsi_destroy_pkt to get the response is a + * hack that may or may not always work depending on HBA + * and target drivers. + * With arc case: PSARC/2009/033 I can use scsi_hba_pkt_comp(). When that + * is out in the wild it should be used. + */ +/* + * If in any of these probes this->pkt is initialized to zero + * the knock on effect is none of the other probes for this clause further + * down get run. + * + * Do not just comment the probes out. Clause local variables are not + * initialized by default so you will see run time dtrace errors if you do + * where other variables do not get set. + */ + +#define PROBE_SWITCH \ + ((target == 0 || pid == target ) && \ + (scsi_d_target_execname == "" || scsi_d_target_execname == execname)) + +fbt:scsi:scsi_transport:entry +/ !PROBE_SWITCH / +{ + this->pkt = (struct scsi_pkt *)0; +} +fbt:scsi:scsi_transport:entry +/ PROBE_SWITCH / +{ + this->pkt = (struct scsi_pkt *)arg0; +} + +fbt:scsi:scsi_destroy_pkt:entry +{ + this->pkt = (struct scsi_pkt *)arg0; +} + +/* + * FRAMEWORK:- create your entry probe and make this->pkt point to the + * argument that has the scsi_pkt. eg add one like this: + * + */ +fbt:*sd:*sd_sense_key_illegal_request:entry +/ !PROBE_SWITCH / +{ + this->pkt = (struct scsi_pkt *)0; +} +fbt:*sd:*sd_sense_key_illegal_request:entry +/ PROBE_SWITCH / +{ + this->pkt = (struct scsi_pkt *)arg3; +} + +/* FRAMEWORK:- Add your probe name to the list for CDB_PROBES */ +#define CDB_PROBES \ +fbt:scsi:scsi_transport:entry, \ +fbt:*sd:*sd_sense_key_illegal_request:entry + +#define ENTRY_PROBES \ +CDB_PROBES, \ +fbt:scsi:scsi_destroy_pkt:entry + +ENTRY_PROBES +/ this->pkt / +{ + this->cdb = (uchar_t *)this->pkt->pkt_cdbp; + this->scb = (uchar_t *)this->pkt->pkt_scbp; + this->group = ((this->cdb[0] & 0xe0) >> 5); + this->lbalen = 0; +} +/* + * 5.11 allows this->name = "->" but vanilla 5.10 does not so I'm sticking + * to the setting a variable and then choosing the string from the global + * array. + */ +fbt:scsi:scsi_transport:entry +/ this->pkt / +{ + start_time[this->scb] = timestamp; + this->name = 0; +} + +/* + * If we are filtering on target pid or execname then only report + * the return packets that we know we sent. + */ +fbt:scsi:scsi_destroy_pkt:entry +/ start_time[this->scb] == 0 && ( target != 0 || scsi_d_target_execname != "") / +{ + this->pkt = 0; +} + +fbt:scsi:scsi_destroy_pkt:entry +/ this->pkt / +{ + this->name = 1; +} +/* + * FRAMEWORK: Add your probe and set this->name to the offset of + * your string in the names table. + */ +fbt:*sd:*sd_sense_key_illegal_request:entry +/ this->pkt / +{ + this->name = 2; +} + +/* + * Now cope with the differnent CDB layouts based on the group. + * + * Trying to use x = this->group == 0 ? y : z; results in D running out + * of registers so it is all done using seperate probes. + * + * Group Listing: + * + Group 0: 6Byte CDBs: scsi_ops[0x000] thru scsi_ops[0x01f] + * + Group 1: 10Byte CDBs: scsi_ops[0x020] thru scsi_ops[0x03f] + * + Group 2: 10Byte CDBs: scsi_ops[0x040] thru scsi_ops[0x05f] + * + Group 3: Variable Length CDBs: scsi_ops[0x060] thru scsi_ops[0x07f] + * + Group 4: 16Byte CDBs: scsi_ops[0x080] thru scsi_ops[0x09f] + * + Group 5: 12Byte CDBs: scsi_ops[0x0a0] thru scsi_ops[0x0bf] + * Group 6: Vendor Specific CDBs: scsi_ops[0x0c0] thru scsi_ops[0x0df] + * Group 7: Vendor Specific CDBs: scsi_ops[0x0e0] thru scsi_ops[0x0ff] + * + * The groups with a leading plus sign "+" are of importance. + */ +ENTRY_PROBES +/ this->pkt && this->group == 0 / +{ + this->lba = INT32(0, (this->cdb[1] & 0x1f), + this->cdb[2], this->cdb[3]); + this->lbalen = 6; + this->len = INT8(this->cdb[4]); + this->control = this->cdb[5]; + this->sa = 0; + this->cdblen = 6; +} + +ENTRY_PROBES +/ this->pkt && this->group == 1 / +{ + this->lba = INT32(this->cdb[2], this->cdb[3], + this->cdb[4], this->cdb[5]); + this->lbalen = 8; + this->len = INT16(this->cdb[7], this->cdb[8]); + this->control = this->cdb[9]; + this->sa = 0; + this->cdblen = 10; +} + +ENTRY_PROBES +/ this->pkt && this->group == 2 / +{ + this->lba = INT32(this->cdb[2], this->cdb[3], + this->cdb[4], this->cdb[5]); + this->lbalen = 8; + this->len = INT16(this->cdb[7], this->cdb[8]); + this->control = this->cdb[9]; + this->sa = 0; + this->cdblen = 10; +} + +ENTRY_PROBES +/ this->pkt && this->group == 3 / +{ + /* + * This is to get around insufficient DTrace Registers... + * + * Instead of doing this in a single step like: + * + * this->lba = INT64(\ + * this->cdb[2], this->cdb[3], this->cdb[4], this->cdb[5], \ + * this->cdb[6], this->cdb[7], this->cdb[8], this->cdb[9]); + * + * The int64 LBA value must be calculated in 5 steps... + */ + this->lbaUpper = INT32(this->cdb[12], this->cdb[13], + this->cdb[14], this->cdb[15]); + this->lbaLower = INT32(this->cdb[16], this->cdb[17], + this->cdb[18], this->cdb[19]); + this->lba = INT64(this->lbaUpper, this->lbaLower); + this->lbaUpper = 0; + this->lbaLower = 0; + + this->lbalen = 16; + this->len = INT32(this->cdb[28], this->cdb[29], + this->cdb[30], this->cdb[31]); + this->control = this->cdb[1]; + this->sa = INT16(this->cdb[8], this->cdb[9]); + this->cdblen = 32 +} + +ENTRY_PROBES +/ this->pkt && this->group == 4 / +{ + /* + * This is to get around insufficient DTrace Registers... + * + * Instead of doing this in a single step like: + * + * this->lba = INT64(\ + * this->cdb[2], this->cdb[3], this->cdb[4], this->cdb[5], \ + * this->cdb[6], this->cdb[7], this->cdb[8], this->cdb[9]); + * + * The int64 LBA value must be calculated in 5 steps... + */ + this->lbaUpper = INT32(this->cdb[2], this->cdb[3], + this->cdb[4], this->cdb[5]); + this->lbaLower = INT32(this->cdb[6], this->cdb[7], + this->cdb[8], this->cdb[9]); + this->lba = INT64(this->lbaUpper, this->lbaLower); + this->lbaUpper = 0; + this->lbaLower = 0; + + this->lbalen = 16; + this->len = INT32(this->cdb[10], this->cdb[11], + this->cdb[12], this->cdb[13]); + this->control = this->cdb[15]; + this->sa = 0; + this->cdblen = 16; +} + +ENTRY_PROBES +/ this->pkt && this->group == 5 / +{ + this->lba = INT32(this->cdb[2], this->cdb[3], + this->cdb[4], this->cdb[5]); + this->lbalen = 8; + this->len = INT32(this->cdb[6], this->cdb[7], + this->cdb[8], this->cdb[9]); + this->control = this->cdb[11]; + this->sa = 0; + this->cdblen = 12; +} +/* + * We don't know the format of the group 6 and 7 commands as they are + * vendor specific. So I just print the cdb up to a maximum of 32 bytes. + */ +ENTRY_PROBES +/ this->pkt && this->group > 5 / +{ + this->lba = 0; + this->lbalen = 0; + this->len = 0; + this->control = 0; + this->sa = 0; +/* + * At some point pkt_cdblen will make it into 5.10 but I can't at this moment + * workout how I can test this before the script compiles (without wrapping + * this in a ksh script which as I have mentioned above I'm not going to do). + */ +#ifndef __SunOS_5_10 + this->cdblen = this->pkt->pkt_cdblen; +#else + this->cdblen = 32; +#endif + +} + +/* + * The guts of the script. Report what we have if we are required. + * First the stuff we do for both outgoing and incoming. + */ + +ENTRY_PROBES +/ !ARGUMENT_TEST / +{ + this->arg_test_passed = 0; +} + +ENTRY_PROBES +/ ARGUMENT_TEST / +{ + this->arg_test_passed = 1; +} + +/* + * If only reporting IO that takes more then a certan time then use + * speculations to capture the outgoing data and then only commit the + * speculation if the packet takes too long. + */ + +#ifdef REPORT_OVERTIME + +#define SPECULATE speculate(specs[this->scb]); + +ENTRY_PROBES +/ this->arg_test_passed && specs[this->scb] == 0 / +{ + specs[this->scb] = speculation(); +} + +#else + +#define SPECULATE + +#endif /* REPORT_OVERTIME */ + +#ifndef QUIET +ENTRY_PROBES +/ this->arg_test_passed / +{ + SPECULATE + PRINT_TIMESTAMP(); + PRINT_DEV_FROM_PKT(this->pkt); + printf("%s 0x%2.2x %9s address %2.2d:%2.2d, lba 0x%*.*x, ", + names[this->name], + this->cdb[0], + scsi_ops[(uint_t)this->cdb[0], this->sa] != 0 ? + scsi_ops[(uint_t)this->cdb[0], this->sa] : "Unknown", + this->pkt->pkt_address.my_atarget, + this->pkt->pkt_address.my_alun, + this->lbalen, this->lbalen, + this->lba); + printf("len 0x%6.6x, control 0x%2.2x timeout %d CDBP %p", + this->len, this->control, + this->pkt->pkt_time, this->cdb); +} + +CDB_PROBES +/ this->arg_test_passed == 1 / +{ + SPECULATE + printf(" %d %s(%d)", this->arg_test_passed, execname, pid); +} + +/* + * For those who are just not happy without some raw hex numbers. Print the + * raw cdb. + */ +CDB_PROBES +/ this->arg_test_passed == 1 && this->cdblen / +{ + SPECULATE + printf(" cdb(%d) ", this->cdblen); +} + +/* + * Now print each byte out of the cdb. + */ + +#define PRINT_CDB(N) \ +CDB_PROBES \ +/ this->arg_test_passed == 1 && this->cdblen > N/ \ +{ \ + SPECULATE; \ + printf("%2.2x", this->cdb[N]) \ +} +#else /* QUIET */ +#define PRINT_CDB(N) +#endif /* QUIET */ + +PRINT_CDB(0) +PRINT_CDB(1) +PRINT_CDB(2) +PRINT_CDB(3) +PRINT_CDB(4) +PRINT_CDB(5) +PRINT_CDB(6) +PRINT_CDB(7) +PRINT_CDB(8) +PRINT_CDB(9) +PRINT_CDB(10) +PRINT_CDB(11) +PRINT_CDB(12) +PRINT_CDB(13) +PRINT_CDB(14) +PRINT_CDB(15) +PRINT_CDB(16) +PRINT_CDB(17) +PRINT_CDB(18) +PRINT_CDB(19) +PRINT_CDB(20) +PRINT_CDB(21) +PRINT_CDB(22) +PRINT_CDB(23) +PRINT_CDB(24) +PRINT_CDB(25) +PRINT_CDB(26) +PRINT_CDB(27) +PRINT_CDB(28) +PRINT_CDB(29) +PRINT_CDB(30) +PRINT_CDB(31) + +/* + * Now the result on the incoming. + */ +#ifdef PERF_REPORT +fbt:scsi:scsi_destroy_pkt:entry +/ this->arg_test_passed == 1 && start_time[this->scb] != 0 / +{ + @[DEV_NAME(this->pkt), DEV_INST(this->pkt) +#ifdef TARGET_STATS + , this->pkt->pkt_address.a_target +#ifdef LUN_STATS + , this->pkt->pkt_address.a_lun +#endif +#endif + ] = quantize(timestamp - start_time[this->scb]); +} +#endif + +fbt:scsi:scsi_destroy_pkt:entry +/ this->arg_test_passed == 1 / +{ + this->state = *(this->scb); + this->state = this->state & 0x3E; +} +#ifndef QUIET +fbt:scsi:scsi_destroy_pkt:entry +/ this->arg_test_passed == 1 / +{ + SPECULATE + printf(", reason 0x%x (%s) pkt_state 0x%x state 0x%x %s Time %dus\n", + this->pkt->pkt_reason, + scsi_reasons[this->pkt->pkt_reason] != 0 ? + scsi_reasons[this->pkt->pkt_reason] : "Unknown", + this->pkt->pkt_state, this->state, scsi_state[this->state] != 0 ? + scsi_state[this->state] : "Unknown", + start_time[this->scb] != 0 ? + (timestamp - start_time[this->scb])/1000 : 0); +} +#endif + +#ifndef QUIET +CDB_PROBES +/ this->arg_test_passed == 1 / +{ + SPECULATE + printf("\n"); +} +#endif +/* + * printing stacks of where we are called from can be useful however it + * does mean you can't just pipe the output through sort(1). Does not work + * with overtime as you can't put a stack into a speculation. + */ +#ifdef PRINT_STACK +CDB_PROBES +/ this->arg_test_passed == 1 / +{ + stack(10); +} +#endif + +#ifdef REPORT_OVERTIME +fbt:scsi:scsi_destroy_pkt:entry +/ this->arg_test_passed == 1 && +((timestamp - start_time[this->scb]) > REPORT_OVERTIME && start_time[this->scb]) / +{ + SPECULATE + commit(specs[this->scb]); + specs[this->scb] = 0; +} +fbt:scsi:scsi_destroy_pkt:entry +/ this->arg_test_passed == 1 && +! ((timestamp - start_time[this->scb]) > REPORT_OVERTIME) / +{ + discard(specs[this->scb]); + specs[this->scb] = 0; +} +#endif + +fbt:scsi:scsi_destroy_pkt:entry +/ this->arg_test_passed == 1 / +{ + start_time[this->scb] = 0; +} + diff --git a/scsilatency.d b/scsilatency.d new file mode 100644 index 0000000..8e95073 --- /dev/null +++ b/scsilatency.d @@ -0,0 +1,58 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +string scsi_cmd[uchar_t]; + +dtrace:::BEGIN +{ + /* See /usr/include/sys/scsi/generic/commands.h for the full list. */ + scsi_cmd[0x00] = "test_unit_ready"; + scsi_cmd[0x08] = "read"; + scsi_cmd[0x0a] = "write"; + scsi_cmd[0x12] = "inquiry"; + scsi_cmd[0x17] = "release"; + scsi_cmd[0x1a] = "mode_sense"; + scsi_cmd[0x1b] = "load/start/stop"; + scsi_cmd[0x1c] = "get_diagnostic_results"; + scsi_cmd[0x1d] = "send_diagnostic_command"; + scsi_cmd[0x25] = "read_capacity"; + scsi_cmd[0x28] = "read(10)"; + scsi_cmd[0x2a] = "write(10)"; + scsi_cmd[0x35] = "synchronize_cache"; + scsi_cmd[0x4d] = "log_sense"; + scsi_cmd[0x5e] = "persistent_reserve_in"; + scsi_cmd[0xa0] = "report_luns"; + + printf("Tracing... Hit Ctrl-C to end.\n"); +} + +fbt::scsi_transport:entry +{ + start[arg0] = timestamp; +} + +fbt::scsi_destroy_pkt:entry +/start[arg0]/ +{ + this->delta = (timestamp - start[arg0]) / 1000; + this->code = *args[0]->pkt_cdbp; + this->cmd = scsi_cmd[this->code] != NULL ? + scsi_cmd[this->code] : lltostr(this->code); + this->reason = args[0]->pkt_reason == 0 ? "Success" : + strjoin("Fail:", lltostr(args[0]->pkt_reason)); + + @num[this->cmd, this->reason] = count(); + @average[this->cmd, this->reason] = avg(this->delta); + @total[this->cmd, this->reason] = sum(this->delta); + + start[arg0] = 0; +} + +dtrace:::END +{ + normalize(@total, 1000); + printf("\n %-26s %-12s %11s %11s %11s\n", "SCSI COMMAND", + "COMPLETION", "COUNT", "AVG(us)", "TOTAL(ms)"); + printa(" %-26s %-12s %@11d %@11d %@11d\n", @num, @average, @total); +} \ No newline at end of file diff --git a/sdretry.d b/sdretry.d new file mode 100644 index 0000000..14e812a --- /dev/null +++ b/sdretry.d @@ -0,0 +1,23 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +dtrace:::BEGIN +{ + printf("Tracing... output every 10 seconds.\n"); +} + +fbt::sd_set_retry_bp:entry +{ + @[xlate (args[1])->dev_statname, + xlate (args[1])->dev_major, + xlate (args[1])->dev_minor] = count(); +} + +tick-10sec +{ + printf("\n%Y:\n", walltimestamp); + printf("%28s %-3s,%-4s %s\n", "DEVICE", "MAJ", "MIN", "RETRIES"); + printa("%28s %-03d,%-4d %@d\n", @); + trunc(@); +} diff --git a/sdretry1.d b/sdretry1.d new file mode 100644 index 0000000..24fc531 --- /dev/null +++ b/sdretry1.d @@ -0,0 +1,23 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +dtrace:::BEGIN +{ +printf("Tracing... output every 10 seconds.\n"); +} + +fbt::sd_set_retry_bp:entry +{ +@[xlate (args[1])->dev_statname, +xlate (args[1])->dev_major, +xlate (args[1])->dev_minor] = count(); +} + +tick-10sec +{ +printf("\n%Y:\n", walltimestamp); +printf("%28s %-3s,%-4s %s\n", "DEVICE", "MAJ", "MIN", "RETRIES"); +printa("%28s %-03d,%-4d %@d\n", @); +trunc(@); +} \ No newline at end of file diff --git a/spa_sync.d b/spa_sync.d new file mode 100644 index 0000000..a3f00c3 --- /dev/null +++ b/spa_sync.d @@ -0,0 +1,48 @@ +#!/usr/sbin/dtrace -qs + +/* + * spa_sync.d - ROCH http://blogs.sun.com/roch/entry/128k_suffice + * mods by benr + * + * Measure I/O throughput as generated by spa_sync + * Between the spa_sync entry and return probe + * I count all I/O and bytes going through bdev_strategy. + * This is a lower bound on what the device can do since + * some aspects of spa_sync are non-concurrent I/Os. + */ + +BEGIN { + tt = 0; /* timestamp */ + b = 0; /* Bytecount */ + cnt = 0; /* iocount */ +} + +spa_sync:entry/(self->t == 0) && (tt == 0)/{ + b = 0; /* reset the I/O byte count */ + cnt = 0; + tt = timestamp; + self->t = 1; + printf("%Y", walltimestamp); +} + +spa_sync:return +/(self->t == 1) && (tt != 0)/ +{ + this->delta = (timestamp-tt); + this->cnt = (cnt == 0) ? 1 : cnt; /* avoid divide by 0 */ + printf("t: %d MB; %d ms of spa_sync; avg sz : %d KB; throughput %d MB/sn", + b / 1048576, + this->delta / 1000000, + b / this->cnt / 1024, + (b * 1000000000) / (this->delta * 1048676)); + tt = 0; + self->t = 0; +} + +/* We only count I/O issued during an spa_sync */ +bdev_strategy:entry +/tt != 0/ +{ + cnt ++; + b += (args[0]->b_bcount); +} \ No newline at end of file diff --git a/spa_sync__.d b/spa_sync__.d new file mode 100644 index 0000000..55ada38 --- /dev/null +++ b/spa_sync__.d @@ -0,0 +1,41 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +inline int MIN_MS = 1; + +dtrace:::BEGIN +{ + printf("Tracing ZFS spa_sync() slower than %d ms...\n", MIN_MS); + @bytes = sum(0); +} + +fbt::spa_sync:entry +/!self->start/ +{ + in_spa_sync = 1; + self->start = timestamp; + self->spa = args[0]; +} + +io:::start +/in_spa_sync/ +{ + @io = count(); + @bytes = sum(args[0]->b_bcount); +} + +fbt::spa_sync:return +/self->start && (this->ms = (timestamp - self->start) / 1000000) > MIN_MS/ +{ + normalize(@bytes, 1048576); + printf("%-20Y %-10s %6d ms, ", walltimestamp, + stringof(self->spa->spa_name), this->ms); + printa("%@d MB %@d I/O\n", @bytes, @io); +} + +fbt::spa_sync:return +{ + self->start = 0; self->spa = 0; in_spa_sync = 0; + clear(@bytes); clear(@io); +} diff --git a/spacemapalloc_lat.d b/spacemapalloc_lat.d new file mode 100644 index 0000000..3b5516d --- /dev/null +++ b/spacemapalloc_lat.d @@ -0,0 +1,24 @@ +#!/usr/sbin/dtrace -s + +fbt::space_map_alloc:entry +{ + self->s = arg1; +} + +fbt::space_map_alloc:return +/arg1 != -1/ +{ + self->s = 0; +} + +fbt::space_map_alloc:return +/self->s && (arg1 == -1)/ +{ + @s = quantize(self->s); + self->s = 0; +} + +tick-10s +{ + printa(@s); +} \ No newline at end of file diff --git a/spasync.d b/spasync.d new file mode 100644 index 0000000..4b1236e --- /dev/null +++ b/spasync.d @@ -0,0 +1,52 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet +#pragma D option dynvarsize=8m + +inline int MIN_MS = 1000; + +dtrace:::BEGIN +{ + printf("Tracing ZFS spa_sync() slower than %d ms...\n", MIN_MS); + /* @bytes = sum(0); */ +} + +fbt::spa_sync:entry +/!self->start/ +{ + in_spa_sync = 1; + self->start = timestamp; + self->spa = args[0]; + _count = 0; + _bytes = 0; +} + +io:::start +/in_spa_sync/ +{ + /* @io = count(); + @bytes = sum(args[0]->b_bcount); */ + @avg_bytesize = avg(args[0]->b_bcount); + _bytes += (args[0]->b_bcount); + _count ++; +} + +fbt::spa_sync:return +/ in_spa_sync && self->spa && self->start && (this->ms = (timestamp - self->start) / 1000000) > MIN_MS/ +{ + this->count = (_count == 0) ? 1 : _count; /* We need to avoid division by zero */ + /* normalize(@bytes, 1048576); */ + this->mbytes = (_bytes == 0) ? 1 : _bytes / 1048576; + this->avg_iosize = _bytes / this->count; + printf("%-20Y %-10s %6d ms, ", walltimestamp, + stringof(self->spa->spa_name), this->ms); + /* printa("%@d MB %@d I/O\n", @bytes, @io); */ + printf("Bytes(b): %d, Megabytes(MB): %d, IOP(s): %d, Avg. Size: %d\n", _bytes, this->mbytes, _count, this->avg_iosize); +} + +fbt::spa_sync:return +/self->spa/ +{ + self->start = 0; self->spa = 0; in_spa_sync = 0; _bytes = 0; + /* clear(@bytes); clear(@io); */ +} \ No newline at end of file diff --git a/tcp_rtt.d b/tcp_rtt.d new file mode 100644 index 0000000..795c559 --- /dev/null +++ b/tcp_rtt.d @@ -0,0 +1,33 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +tcp:::send +{ + snxt[args[1]->cs_cid, args[3]->tcps_snxt] = timestamp; + bytesrttttmp[args[1]->cs_cid, args[3]->tcps_snxt] = + args[2]->ip_plength - args[4]->tcp_offset; +} + +tcp:::receive +/ snxt[args[1]->cs_cid, args[4]->tcp_ack] / +{ + @bytesrtt[args[2]->ip_saddr, args[4]->tcp_sport] = + sum(bytesrttttmp[args[1]->cs_cid, args[4]->tcp_ack]); + @meanrtt[args[2]->ip_saddr, args[4]->tcp_sport] = + avg(timestamp - snxt[args[1]->cs_cid, args[4]->tcp_ack]); + @stddevrtt[args[2]->ip_saddr, args[4]->tcp_sport] = + stddev(timestamp - snxt[args[1]->cs_cid, args[4]->tcp_ack]); + @countrtt[args[2]->ip_saddr, args[4]->tcp_sport] = count(); + snxt[args[1]->cs_cid, args[4]->tcp_ack] = 0; + bytesrttttmp[args[1]->cs_cid, args[4]->tcp_ack] = 0; +} + +END +{ + printf("%-20s %-8s %-15s %-15s %-10s %-7s\n", + "Remote host", "Port", "TCP Avg RTT(ns)", "StdDev", "NumBytes", + "NumSegs"); + printa("%-20s %-8d %@-15d %@-15d %@-10d %@-7d\n", @meanrtt, + @stddevrtt, @bytesrtt, @countrtt); +} diff --git a/trace_arc.d b/trace_arc.d new file mode 100644 index 0000000..21143ab --- /dev/null +++ b/trace_arc.d @@ -0,0 +1,240 @@ +#!/usr/sbin/dtrace -s +#pragma D option flowindent + +fbt:zfs:arc_read:entry +/pid == $target/ +{ + self->ts = timestamp; + self->interested = 1; +} + +zfs:: +/self->interested/ +{ +} + +buf_hash_find:entry +/self->interested/ +{ + printf("Spa: %d, DVA %d.%d, birth: %d", args[0], args[1]->dva_word[0], args[1]->dva_word[1], args[2]); +} + +buf_hash_insert:entry +/self->interested/ +{ + /* ARC_IN_HASH_TABLE flag might be set here */ +} + +arc_buf_alloc:entry +/self->interested/ +{ + printf("b_datacnt may be incremented here"); +} + +arc_buf_clone:entry +/self->interested/ +{ + printf("b_datacnt may be incremented here"); +} + +buf_hash_insert:return +/self->interested/ +{ + +} + +arc_released:return +/self->interested/ +{ + trace(args[1]); +} + +dbuf_read_done:entry +/self->interested/ +{ + trace(args[0]->io_error); +} + +dbuf_read_impl:entry +/self->interested/ +{ + /* ARC_L2CACHE flag might be set here */ +} + +dbuf_read_impl:return +/self->interested/ +{ + +} + +dmu_objset_open_impl:entry +/self->interested/ +{ + /* ARC_L2CACHE flag might be set here */ +} + +dmu_objset_open_impl:return +/self->interested/ +{ + +} + +arc_read_nolock:entry +/self->interested/ +{ + printf("IMPORTANT - next, figure out who gives us these arc_flags and why? \n"); + printf("arc_flags: %d - ", *args[7]); + printf("%s", (*args[7] & (1 << 1)) ? "ARC_WAIT " : ""); + printf("%s", (*args[7] & (1 << 2)) ? "ARC_NOWAIT " : ""); + printf("%s", (*args[7] & (1 << 3)) ? "ARC_PREFETCH " : ""); + printf("%s", (*args[7] & (1 << 4)) ? "ARC_CACHED " : ""); + printf("%s", (*args[7] & (1 << 5)) ? "ARC_L2CACHE " : ""); + /* ARC_L2CACHE flag might be set here in 3 different spots */ + +} + +arc_read_nolock:return +/self->interested/ +{ + +} + +arc_write:entry +/self->interested/ +{ + +} + +arc_write:return +/self->interested/ +{ + /* ARC_L2CACHE flag might be set here */ +} + + +arc_get_data_buf:entry +/self->interested/ +{ + printf("When called from within arc_read_nolock, b_datacnt is increased right before calling here. See arc.c line 2827 \n"); + printf("Buf state: %a, Buf size: %d, Buf type: %s", args[0]->b_hdr->b_state, args[0]->b_hdr->b_size, args[0]->b_hdr->b_type == 0 ? "ARC_BUFC_DATA" : "ARC_BUFC_METADATA"); +} + +arc_adapt:entry +/self->interested/ +{ + printf("State: %a, Size: %d", args[1], args[0]); +} + +arc_reclaim_needed:return +/self->interested/ +{ + printf("%s", args[1] == 0 ? "false" : "true"); +} + +arc_change_state:entry +/self->interested/ +{ + printf("New state: %a, Old state: %a, Refcnt: %d, Datacnt: %d, B_size: %d", args[0], args[1]->b_state, args[1]->b_refcnt.rc_count, args[1]->b_datacnt, args[1]->b_size); +} + +zio_buf_alloc:entry +/self->interested/ +{ + trace(args[0]); +} + +arc_space_consume:entry +/self->interested/ +{ + printf("Consuming %d bytes of arc_space_type %s", args[0], args[1] == 0 ? "DATA" : "HDRS, L2HDRS or OTHER"); +} + +dbuf_update_data:entry +/self->interested/ +{ + trace(args[0]->db_level); + trace(args[0]->db_user_data_ptr_ptr); +} + +buf_hash_find:return +/self->interested/ +{ + printf("Datacnt: %d, Hdr_flags: %d", args[1]->b_datacnt, args[1]->b_flags); +} + +arc_access:entry +/self->interested/ +{ + printf("Datacnt: %d,",args[0]->b_datacnt); + printf(" Hdr_flags: %d = ", args[0]->b_flags); + printf("%s", (args[0]->b_flags & (1 << 1)) ? "ARC_WAIT " : ""); + printf("%s", (args[0]->b_flags & (1 << 2)) ? "ARC_NOWAIT " : ""); + printf("%s", (args[0]->b_flags & (1 << 3)) ? "ARC_PREFETCH " : ""); + printf("%s", (args[0]->b_flags & (1 << 4)) ? "ARC_CACHED " : ""); + printf("%s", (args[0]->b_flags & (1 << 5)) ? "ARC_L2CACHE " : ""); + printf("%s", (args[0]->b_flags & (1 << 6)) ? "WTF_6 " : ""); + printf("%s", (args[0]->b_flags & (1 << 7)) ? "WTF_7 " : ""); + printf("%s", (args[0]->b_flags & (1 << 8)) ? "WTF_8 " : ""); + printf("%s", (args[0]->b_flags & (1 << 9)) ? "ARC_IN_HASH_TABLE " : ""); + printf("%s", (args[0]->b_flags & (1 << 10)) ? "ARC_IO_IN_PROGRESS " : ""); + printf("%s", (args[0]->b_flags & (1 << 11)) ? "ARC_IO_ERROR " : ""); + printf("%s", (args[0]->b_flags & (1 << 12)) ? "ARC_FREED_IN_READ " : ""); + printf("%s", (args[0]->b_flags & (1 << 13)) ? "ARC_BUF_AVAILABLE " : ""); + printf("%s", (args[0]->b_flags & (1 << 14)) ? "ARC_INDIRECT " : ""); + printf("%s", (args[0]->b_flags & (1 << 15)) ? "ARC_FREE_IN_PROGRESS " : ""); + printf("%s", (args[0]->b_flags & (1 << 16)) ? "ARC_L2_EVICTING " : ""); + printf("%s", (args[0]->b_flags & (1 << 17)) ? "ARC_L2_EVICTED " : ""); + printf("%s", (args[0]->b_flags & (1 << 18)) ? "ARC_L2_WRITE_HEAD " : ""); + + +} +add_reference:entry +/self->interested/ +{ + trace(args[0]->b_datacnt); + trace(args[0]->b_flags); +} + +fbt:zfs:arc_read:return +/pid == $target/ +{ + printf("Time spent in arc_read: %d ns", timestamp - self->ts); + self->iinterested = 0; + self->ts = NULL; +} + +fbt::arc_evict:entry +/self->interested/ +{ + printf("%Y %-10a %-10s %-10s %d bytes\n", walltimestamp, args[0], + arg4 == 0 ? "data" : "metadata", + arg3 == 0 ? "evict" : "recycle", arg2); + + /* ARC_IN_HASH_TABLE flag might be set here */ + +} + +fbt::arc_buf_evict:entry +/self->interested/ +{ + + /* ARC_IN_HASH_TABLE flag might be set here */ + +} + +fbt:zfs:arc_evict_needed:return +/self->interested/ +{ + printf("%s", args[1] == 0 ? "false" : "true"); +} + +fbt:zfs:buf_hash_remove:entry +/self->interested/ +{ + printf("Removing buffer hash of %a", args[0]->b_state); +} + +fbt:zfs:buf_hash_remove:return +/self->interested/ +{ +} diff --git a/trussrw.d b/trussrw.d new file mode 100644 index 0000000..0b507bd --- /dev/null +++ b/trussrw.d @@ -0,0 +1,31 @@ +/* +* Run script with dtrace -qs +*/ +dtrace:::BEGIN + { + i = 20; + } + + +syscall::read:entry, +syscall::write:entry +/i > 0, pid == $1/ +{ + trace(i--); + printf("\n[%s] , %s, %s(%d, 0x%x, %4d)", execname, probename, probefunc, arg0, arg1, arg2); +} + +syscall::read:return, +syscall::write:return +/pid == $1/ +{ + printf("\t\t = %d\n", arg1); +} + +syscall::read:entry, +syscall::write:entry +/i == 0/ +{ + trace("blastoff!"); + exit(0) +} \ No newline at end of file diff --git a/txg_monitor.d b/txg_monitor.d index 1afe9de..4947f00 100644 --- a/txg_monitor.d +++ b/txg_monitor.d @@ -62,4 +62,4 @@ fbt::spa_sync:return { self->start = 0; self->spa = 0; in_spa_sync = 0; clear(@bytes); clear(@io); clear(@delays); clear(@throttles); trunc(@wrl_min); trunc(@wrl_max); trunc(@reserved_max); -} +} \ No newline at end of file diff --git a/uberupd.d b/uberupd.d new file mode 100644 index 0000000..f22bccd --- /dev/null +++ b/uberupd.d @@ -0,0 +1,38 @@ +#!/usr/sbin/dtrace -s + +/* #pragma D option quiet */ + +/* This section may be of questionable value, but +could be used to determine which block numbers correlate with +which uberblock update events +*/ +io:genunix:default_physio:start, +io:genunix:bdev_strategy:start, +io:genunix:biodone:done +{ + printf ("%d %s Blk Num: %d Byte Count: %d", timestamp, execname, + args[0]->b_blkno, args[0]->b_bcount); +} + +fbt:zfs:uberblock_update:entry +{ + printf ("%d %s, %d, %d, last_sync_TXG: %d, birth_TXG: %d, %d, curr_TXG: %d", timestamp, execname, + pid, args[0]->ub_rootbp.blk_prop, args[0]->ub_txg, + args[0]->ub_rootbp.blk_birth, args[1]->vdev_asize, args[2]); +} + +/* +Watch carefully for the `:return` probe, last arg. of which is a bool., +indicating whether or not uberblock was updated during last TXG. +If the birth_TXG and the curr_TXG match, we know uberblock was updated, +and sure enough the `:return` probe should have its last argument be '1'. + +Example (pay attn. to birth_TXG: 6147837, and curr_TXG: 6147837): + 0 52274 uberblock_update:entry 5857149610527 sched, 0, 9226475966770118659, last_sync_TXG: 6147836, birth_TXG: 6147837, 0, curr_TXG: 6147837 + 0 52275 uberblock_update:return 5857149619433 sched, 0, 1 +*/ +fbt:zfs:uberblock_update:return +{ + printf ("%d %s, %d, %d", timestamp, execname, + pid, args[1]); +} diff --git a/watch-arc-shrink.d b/watch-arc-shrink.d new file mode 100644 index 0000000..dbb33e4 --- /dev/null +++ b/watch-arc-shrink.d @@ -0,0 +1,13 @@ +#!/usr/sbin/dtrace -s + +fbt:zfs:arc_reclaim_needed:return +/args[1]/ +{ + printf("%Y return=%d\n", walltimestamp, args[1]); +} + +fbt:zfs:arc_shrink:entry +{ + printf("%Y\n", walltimestamp); +} + diff --git a/zfs_perds_io_latency.d b/zfs_perds_io_latency.d new file mode 100644 index 0000000..e44341e --- /dev/null +++ b/zfs_perds_io_latency.d @@ -0,0 +1,57 @@ +#!/usr/sbin/dtrace -s +#pragma D option quiet + +BEGIN { + + x = 0; + y = 0; + watermark = 100; /* We want to track all IO slower than 50ms. */ + start_d = timestamp; + to_millisec = 1000000; + to_microsec = 1000; + div = "--------------------------------------------------------------------------------"; +} + +dmu_buf_hold_array_by_dnode:entry +/args[0]->dn_objset->os_dsl_dataset && args[3]/ /* Reads */ + +{ + this->ds = stringof(args[0]->dn_objset->os_dsl_dataset->ds_dir->dd_myname); + this->parent = stringof(args[0]->dn_objset->os_dsl_dataset->ds_dir->dd_parent->dd_myname); + /* this->path = strjoin(strjoin(this->parent,"/"),this->ds); */ /* Dirty hack - parent/this format doesn't guarantee full path */ + self->path = strjoin(strjoin(this->parent,"/"),this->ds); + self->uniq = args[0]->dn_objset->os_dsl_dataset->ds_fsid_guid; + self->reads = 1; + ts[ self->path, self->reads, self->uniq ] = timestamp; + +} + +dmu_buf_hold_array_by_dnode:entry +/args[0]->dn_objset->os_dsl_dataset && !args[3]/ /* Writes */ + +{ + this->ds = stringof(args[0]->dn_objset->os_dsl_dataset->ds_dir->dd_myname); + this->parent = stringof(args[0]->dn_objset->os_dsl_dataset->ds_dir->dd_parent->dd_myname); + /* this->path = strjoin(strjoin(this->parent,"/"),this->ds); */ /* Dirty hack - parent/this format doesn't guarantee full path */ + self->path = strjoin(strjoin(this->parent,"/"),this->ds); + self->uniq = args[0]->dn_objset->os_dsl_dataset->ds_fsid_guid; + self->writes = 1; + ts[ self->path, self->writes, self->uniq ] = timestamp; + +} + +dmu_buf_hold_array_by_dnode:return +/ts[ self->path, self->reads, self->uniq ] && args[1] == 0/ +{ + x = (timestamp - ts[ self->path, self->reads, self->uniq ]) / to_microsec; + @["Reads (us)", self->path] = lquantize(x,0,100,10); + self->reads = 0; +} + +dmu_buf_hold_array_by_dnode:return +/ts[ self->path, self->writes, self->uniq ] && args[1] == 0/ +{ + x = (timestamp - ts[ self->path, self->writes, self->uniq ]) / to_microsec; + @["Writes (us)", self->path] = lquantize(x,0,100,10); + self->writes = 0; +} diff --git a/zfs_vdev_iostart.d b/zfs_vdev_iostart.d new file mode 100644 index 0000000..62179b1 --- /dev/null +++ b/zfs_vdev_iostart.d @@ -0,0 +1,52 @@ +#!/usr/sbin/dtrace -s +#pragma D option quiet +#pragma D option dynvarsize=4M + +BEGIN { + + x = 0; + y = 0; + watermark = 50; /* We want to track all IO slower than 50ms. */ + start_d = walltimestamp; + to_millisec = 1000000; + to_microsec = 1000; + div = "--------------------------------------------------------------------------------"; +} + +fbt:zfs:zio_vdev_io_start:entry +{ + self->io_offset = args[0]->io_offset; + self->io_txg = args[0]->io_txg; + ts[ self->io_offset, self->io_txg ] = timestamp; + self->vtime = vtimestamp; +} + +fbt:zfs:zio_vdev_io_done:entry +/* Measure Latency of I/Os tagged as ZIO_TYPE_READ */ +/self->vtime && ts[ self->io_offset, self->io_txg ] && args[0]->io_spa->spa_name != NULL && args[0]->io_type == ZIO_TYPE_READ/ +{ + this->spa_name = stringof(args[0]->io_spa->spa_name); + this->x = (timestamp - ts[ self->io_offset, self->io_txg ]) / to_microsec; + this->y = (vtimestamp - self->vtime) / to_microsec; + @Realt["Time (us)", this->spa_name, "READ"] = lquantize(this->x,10,100,10); + @CPUt["Time onCPU (us)", this->spa_name, "READ"] = lquantize(this->y,0,50,10); + @rlat["Avg. Time (us)","READ"] = avg(this->x); + @roncpu["Avg. Time onCPU (us)","READ"] = avg(this->y); + ts[ self->io_offset, self->io_txg ] = 0; + self->vtime = 0; +} + +fbt:zfs:zio_vdev_io_done:entry +/* Measure Latency of I/Os tagged as ZIO_TYPE_WRITE */ +/self->vtime && ts[ self->io_offset, self->io_txg ] && args[0]->io_spa->spa_name != NULL && args[0]->io_type == ZIO_TYPE_WRITE/ +{ + this->spa_name = stringof(args[0]->io_spa->spa_name); + this->x = (timestamp - ts[ self->io_offset, self->io_txg ]) / to_microsec; + this->y = (vtimestamp - self->vtime) / to_microsec; + @Realt["Time (us)", this->spa_name, "WRITE"] = lquantize(this->x,10,100,10); + @CPUt["Time onCPU (us)", this->spa_name, "WRITE"] = lquantize(this->y,0,50,10); + @wlat["Avg. Time (us)","WRITE"] = avg(this->x); + @woncpu["Avg. Time onCPU (us)","WRITE"] = avg(this->y); + ts[ self->io_offset, self->io_txg ] = 0; + self->vtime = 0; +} diff --git a/zfs_write_throttle.d b/zfs_write_throttle.d new file mode 100644 index 0000000..8c902d3 --- /dev/null +++ b/zfs_write_throttle.d @@ -0,0 +1,103 @@ +#!/usr/sbin/dtrace -s + +/* + * COPYRIGHT: Copyright (c) 2008 Thomas Bastian. + * + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy at http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * CDDL HEADER END + * + * Version: 20080728 + */ + +#pragma D option quiet + +fbt:zfs:spa_sync:entry +{ + self->t = timestamp; + self->pool = args[0]->spa_dsl_pool; + self->txg = args[1]; + self->pname = stringof(self->pool->dp_root_dir->dd_myname); + + @wlimit[self->pname, "Write limit (MB)"] = + lquantize(self->pool->dp_write_limit / 1048576, 250, 8192, 250); + @scnt[self->pname] = count(); +} + +fbt:zfs:spa_sync:return +/self->t/ +{ + @stime[self->pname, "Sync time (ms)"] = + lquantize((timestamp - self->t) / 1000000, 20, 2000, 20); + @bytes[self->pname] = + sum(self->pool->dp_space_towrite[self->txg & 4] / 1048576); + @written[self->pname, "Written (MB)"] = + lquantize(self->pool->dp_space_towrite[self->txg & 4] / 1048576, + 200, 4096, 200); + + self->t = 0; + self->pool = 0; + self->txg = 0; + self->pname = 0; +} + +fbt:zfs:txg_delay:entry +{ + self->d_t = timestamp; + self->d_pname = stringof(args[0]->dp_root_dir->dd_myname); +} + +fbt:zfs:txg_delay:return +/self->d_t && ((timestamp - self->d_t) >= 10000)/ +{ + @txgdelay[self->d_pname] = count(); + self->d_t = 0; + self->d_pname = 0; +} + +fbt:zfs:txg_delay:return +/self->d_t && ((timestamp - self->d_t) < 10000)/ +{ + self->d_t = 0; + self->d_pname = 0; +} + +tick-10s +{ + printf("--- %Y\n", walltimestamp); + normalize(@scnt, 10); + printf(" %-47s %18s\n", "", "Sync rate (/s)"); + printa(" %-47s %@18d\n", @scnt); + printf("\n"); + trunc(@scnt); + + normalize(@bytes, 10); + printf(" %-47s %18s\n", "", "MB/s"); + printa(" %-47s %@18d\n", @bytes); + printf("\n"); + trunc(@bytes); + + normalize(@txgdelay, 10); + printf(" %-47s %18s\n", "", "Delays/s"); + printa(" %-47s %@18d\n", @txgdelay); + printf("\n"); + trunc(@txgdelay); + + printa(@stime); + trunc(@stime); + + printa(@written); + trunc(@written); + + printa(@wlimit); + trunc(@wlimit); +} diff --git a/zfsarcblocksize.d b/zfsarcblocksize.d new file mode 100644 index 0000000..e2744af --- /dev/null +++ b/zfsarcblocksize.d @@ -0,0 +1,47 @@ +#!/usr/sbin/dtrace -s +#pragma D option quiet +/* +* zfsarcblocksize.d - Script used for high-level observation +* of the ARC size distribution for data and metadata accesses +* quantize function is used to display a distribution plot +* of bytes and of type of data being accessed, i.e. real data +* or Metadata. It may be useful to observe the sizes of blocks +* being retrieved and the sizes of metadata reads vs. data reads +* in the ARC. +*/ + +dtrace:::BEGIN +{ + cnt = 0; +} + +sdt:zfs::arc-hit,sdt:zfs::arc-miss +/ this->b_size = ((arc_buf_hdr_t *)arg0)->b_size / +{ + @b["bytes"] = quantize(this->b_size); + ts = walltimestamp; + cnt++; +} + +sdt:zfs::arc-hit,sdt:zfs::arc-miss +/ this->b_size / +{ /* Determine if this is data or Metadata, if argument is 0, + it is data, else 1 is Metadata */ + /* @t[this->b_type = ((arc_buf_hdr_t *)arg0)->b_type == 0 ? + "Data" : "Metadata"] = quantize(((arc_buf_hdr_t *)arg0)->b_type); => + Attempted to quantize this, but it makes little sense, since there are + only two options, with regard to b_type (0,1) */ + @t[this->b_type = ((arc_buf_hdr_t *)arg0)->b_type == 0 ? + "Data" : "Metadata"] = count(); +} + +tick-1sec +/ this->b_size && cnt > 0 / +{ + printf("\n\n%Y\n", ts); + printa("\tBytes: %@d\n", @b); + printa("\tType (%s): %@d ", @t); + /* printa("\n Type(0->data, 1->metadata): %@d\n", @t); */ + clear(@b); clear(@t); + cnt = 0; /* Reset counter for next round */ +} diff --git a/zfsbiorwsize.d b/zfsbiorwsize.d new file mode 100644 index 0000000..6693a2b --- /dev/null +++ b/zfsbiorwsize.d @@ -0,0 +1,105 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +dtrace:::BEGIN +{ + printf("Tracing... Hit Ctrl-C to end.\n"); +} + +fbt::zfs_read:entry, fbt::zfs_write:entry +{ + this->startzfsio = 1; + start_time[arg0] = timestamp; + /* Storing buffer size to make sure that io:::done probes are triggered + only when this value is greater than 0 */ + this->path = args[0]->v_path; + this->size = args[1]->uio_resid; +} + + +io:::start +/ this->startzfsio / +{ + /* Storing buffer size to make sure that io:::done probes are triggered + only when this value is greater than 0 */ + trig = args[0]->b_bufsize; +} + +io:::done + +/ this->startzfsio && (args[0]->b_flags & B_READ) && (this->start = start_time[arg0]) / +{ + this->delta = (timestamp - this->start) / 1000; + printf("[READ] Device: %5s Size (bytes): %5d Time (us): %5u \n", + args[1]->dev_statname, args[0]->b_bcount, this->delta); +/* @plots[args[1]->dev_statname, args[0]->b_bcount] = quantize(this->delta); */ + /* Figure out average byte size*/ + this->size = args[0]->b_bcount; + @bsize["average read, bytes"] = avg(this->size); + @plots["read I/O, us"] = quantize(this->delta); + @avgs["average read I/O, us"] = avg(this->delta); + start_time[arg0] = 0; +} + +io:::done +/ this->startzfsio && !(args[0]->b_flags & B_READ) && (this->start = start_time[arg0]) / +{ + this->delta = (timestamp - this->start) / 1000; + printf("[WRITE] Device: %5s Size (bytes): %5d Time (us): %5u \n", + args[1]->dev_statname, args[0]->b_bcount, this->delta); +/* @plots[args[1]->dev_statname, args[0]->b_bcount] = quantize(this->delta); */ + /* Figure out average byte size*/ + this->size = args[0]->b_bcount; + @bsize["average write, bytes"] = avg(this->size); + @plots["write I/O, us"] = quantize(this->delta); + @avgs["average write I/O, us"] = avg(this->delta); + start_time[arg0] = 0; +} + +fbt::zfs_read:return +/ this->start = start_time[arg0] / +{ + this->delta = (timestamp - this->start) / 1000; + printf("[READ] Device: %5s Size (bytes): %5d Time (us): %5u \n", + stringof(this->path), this->size, this->delta); +/* @plots[args[1]->dev_statname, args[0]->b_bcount] = quantize(this->delta); */ + /* Figure out average byte size*/ + /* this->size = args[0]->b_bcount; */ + @zbsize["average read, bytes"] = avg(this->size); + @zplots["read I/O, us"] = quantize(this->delta); + @zavgs["average read I/O, us"] = avg(this->delta); + start_time[arg0] = 0; + this->startzfsio = 0; +} + +fbt::zfs_write:return +/ this->start = start_time[arg0] / +{ + this->delta = (timestamp - this->start) / 1000; + printf("[WRITE] Device: %5s Size (bytes): %5d Time (us): %5u \n", + stringof(this->path), this->size, this->delta); +/* @plots[args[1]->dev_statname, args[0]->b_bcount] = quantize(this->delta); */ + /* Figure out average byte size*/ + /* this->size = args[0]->b_bcount; */ + @zbsize["average write, bytes"] = avg(this->size); + @zplots["write I/O, us"] = quantize(this->delta); + @zavgs["average write I/O, us"] = avg(this->delta); + start_time[arg0] = 0; + this->startzfsio = 0; +} + +dtrace:::END +/trig > 0/ +{ + printf("\nI/O completed time and size summary:\n\n"); + printa("\t%s %@d\n", @avgs); + printa("\t%s %@d\n", @bsize); + printa("\n %s\n%@d\n", @plots); + trig = 0; + printf("\nI/O completed time and size summary:\n\n"); + printa("\t%s %@d\n", @zavgs); + printa("\t%s %@d\n", @zbsize); + printa("\n %s\n%@d\n", @zplots); + trig = 0; +} diff --git a/zfsiosizebyfile.d b/zfsiosizebyfile.d new file mode 100644 index 0000000..838c4c1 --- /dev/null +++ b/zfsiosizebyfile.d @@ -0,0 +1,64 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet +#pragma D option dynvarsize=4m +#pragma D option switchrate=10hz + +/* This script quantizes IO by path, when first argument given is +a partial path. Normally, expecting to see a partial path to data +on a ZFS dataset. Since `/volumes` is the default path under which +all datasets are mounted, we would expect to see /volumes/poolname... +*/ + +dtrace:::BEGIN +{ + /* printf("%-12s %40s\n", "OP SIZE", "PATH"); */ + printf("Tracing... Hit Ctrl-C to end.\n"); + path = $1; + read = 0; + write = 0; + +} + +/* see uts/common/fs/zfs/zfs_vnops.c */ + +fbt::zfs_read:entry +/* Match first parameter on command line, +expecting partial path, like /volumes/poolname */ + +/* If we cannot extract path information, we ignore. */ +/args[0]->v_path != NULL && strstr(stringof(args[0]->v_path), path) != NULL/ +{ + read = 1; + self->path = args[0]->v_path; + self->bytes = args[1]->uio_resid; + + @rbytes["read bytes", self->path != NULL ? stringof(self->path) : ""] = quantize(self->bytes); +} + +fbt::zfs_write:entry +/* Match first parameter on command line, +expecting partial path, like /volumes/poolname */ + +/* If we cannot extract path information, we ignore. */ +/args[0]->v_path != NULL && strstr(stringof(args[0]->v_path), path) != NULL/ +{ + write = 1; + self->path = args[0]->v_path; + self->bytes = args[1]->uio_resid; + + @wbytes["write bytes", self->path != NULL ? stringof(self->path) : ""] = quantize(self->bytes); + +} + +dtrace:::END +/read > 0/ +{ + printa("%-16s Path: %s %@d\n", @rbytes); +} + +dtrace:::END +/write > 0/ +{ + printa("%-16s Path: %s %@d\n", @wbytes); +} \ No newline at end of file diff --git a/zfslatency.d b/zfslatency.d new file mode 100644 index 0000000..247df17 --- /dev/null +++ b/zfslatency.d @@ -0,0 +1,64 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet +#pragma D option defaultargs +#pragma D option switchrate=10hz + +dtrace:::BEGIN +{ + +} + +/* see uts/common/fs/zfs/zfs_vnops.c */ + +fbt::zfs_read:entry, +fbt::zfs_write:entry +{ + self->path = args[0]->v_path; + self->bytes = args[1]->uio_resid; + self->start = timestamp; +} + +fbt::zfs_read:return +/ this->iotime = (timestamp - self->start) / +{ + this->latency_ms = this->iotime / 1000; +/* printf("read %5d %5d\n", self->bytes, this->latency_ms); */ +/* @plots[args[1]->dev_statname, args[0]->b_bcount] = quantize(this->delta); */ + /* Figure out average byte size*/ + /* this->size = args[0]->b_bcount; */ + @bsize["read_b"] = avg(self->bytes); + @avg_latency["read_lat"] = avg(this->latency_ms); +} + + +fbt::zfs_write:return +/ this->iotime = (timestamp - self->start) / +{ + this->latency_ms = this->iotime / 1000; +/* printf("write %5d %5d\n", self->bytes, this->latency_ms); */ +/* @plots[args[1]->dev_statname, args[0]->b_bcount] = quantize(this->delta); */ + /* Figure out average byte size*/ + /* this->size = args[0]->b_bcount; */ + @bsize["write_b"] = avg(self->bytes); + @avg_latency["write_lat"] = avg(this->latency_ms); +} + +tick-1sec +/this->latency_ms > 0/ +{ + printf("%u ",walltimestamp/1000000000); + printa("%s %@d ", @avg_latency); + printa("\t%s %@d ", @bsize); + printf("%s\n", ""); + self->path = 0; self->bytes = 0; self->start = 0; + clear(@avg_latency); clear(@bsize); +} + + +/* fbt::zfs_read:return, +fbt::zfs_write:return +{ + self->path = 0; self->bytes = 0; self->start = 0; + clear(@avg_latency); clear(@bsize); +} */ \ No newline at end of file diff --git a/zfsrwtimesize.d b/zfsrwtimesize.d new file mode 100644 index 0000000..c8195d1 --- /dev/null +++ b/zfsrwtimesize.d @@ -0,0 +1,56 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +dtrace:::BEGIN +{ + printf("Tracing... Hit Ctrl-C to end.\n"); +} + +fbt::zfs_read:entry, fbt::zfs_write:entry +{ + start_time[arg0] = timestamp; + /* Storing buffer size to make sure that io:::done probes are triggered + only when this value is greater than 0 */ + this->path = args[0]->v_path; + this->size = args[1]->uio_resid; +} + + +fbt::zfs_read:return +/ this->start = start_time[arg0] / +{ + this->delta = (timestamp - this->start) / 1000; + printf("[READ] Device: %5s Size (bytes): %5d Time (us): %5u \n", + stringof(this->path), this->size, this->delta); +/* @plots[args[1]->dev_statname, args[0]->b_bcount] = quantize(this->delta); */ + /* Figure out average byte size*/ + /* this->size = args[0]->b_bcount; */ + @bsize["average read, bytes"] = avg(this->size); + @plots["read I/O, us"] = quantize(this->delta); + @avgs["average read I/O, us"] = avg(this->delta); + start_time[arg0] = 0; +} + +fbt::zfs_write:return +/ this->start = start_time[arg0] / +{ + this->delta = (timestamp - this->start) / 1000; + printf("[WRITE] Device: %5s Size (bytes): %5d Time (us): %5u \n", + stringof(this->path), this->size, this->delta); +/* @plots[args[1]->dev_statname, args[0]->b_bcount] = quantize(this->delta); */ + /* Figure out average byte size*/ + /* this->size = args[0]->b_bcount; */ + @bsize["average write, bytes"] = avg(this->size); + @plots["write I/O, us"] = quantize(this->delta); + @avgs["average write I/O, us"] = avg(this->delta); + start_time[arg0] = 0; +} + +dtrace:::END +{ + printf("\nI/O completed time and size summary:\n\n"); + printa("\t%s %@d\n", @avgs); + printa("\t%s %@d\n", @bsize); + printa("\n %s\n%@d\n", @plots); +} diff --git a/zfsslower.d b/zfsslower.d new file mode 100644 index 0000000..5aec754 --- /dev/null +++ b/zfsslower.d @@ -0,0 +1,81 @@ +#!/usr/sbin/dtrace -s +/* + * zfsslower.d show ZFS I/O taking longer than given ms. + * + * USAGE: zfsslower.d min_ms + * eg, + * zfsslower.d 100 # show I/O at least 100 ms + * + * This is from the DTrace book, chapter 5. It has been enhanced to include + * zfs_readdir() as well, which is shown in the "D" (direction) field as "D". + * + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at http://smartos.org/CDDL + * + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file. + * + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Copyright (c) 2012 Joyent Inc., All rights reserved. + * Copyright (c) 2012 Brendan Gregg, All rights reserved. + * + * TESTED: this fbt provider based script may only work on some OS versions. + * 121: ok + */ + +#pragma D option quiet +#pragma D option defaultargs +#pragma D option switchrate=10hz + +dtrace:::BEGIN +{ + printf("%-20s %-16s %1s %4s %6s %s\n", "TIME", "PROCESS", + "D", "KB", "ms", "FILE"); + min_ns = $1 * 1000000; +} + +/* see uts/common/fs/zfs/zfs_vnops.c */ + +fbt::zfs_read:entry, fbt::zfs_write:entry +{ + self->path = args[0]->v_path; + self->kb = args[1]->uio_resid / 1024; + self->start = timestamp; +} + +fbt::zfs_readdir:entry +{ + self->path = args[0]->v_path; + self->kb = 0; + self->start = timestamp; +} + +fbt::zfs_read:return, fbt::zfs_write:return, fbt::zfs_readdir:return +/self->start && (timestamp - self->start) >= min_ns/ +{ + this->iotime = (timestamp - self->start) / 1000000; + this->dir = probefunc == "zfs_read" ? "R" : + probefunc == "zfs_write" ? "W" : "D"; + printf("%-20Y %-16s %1s %4d %6d %s\n", walltimestamp, + execname, this->dir, self->kb, this->iotime, + self->path != NULL ? stringof(self->path) : ""); +} + +fbt::zfs_read:return, fbt::zfs_write:return, fbt::zfs_readdir:return +{ + self->path = 0; self->kb = 0; self->start = 0; +} \ No newline at end of file diff --git a/zfsslower_q.d b/zfsslower_q.d new file mode 100644 index 0000000..0840823 --- /dev/null +++ b/zfsslower_q.d @@ -0,0 +1,84 @@ +#!/usr/sbin/dtrace -s +/* + * zfsslower.d show ZFS I/O taking longer than given ms. + * + * USAGE: zfsslower.d min_ms + * eg, + * zfsslower.d 100 # show I/O at least 100 ms + * + * This is from the DTrace book, chapter 5. It has been enhanced to include + * zfs_readdir() as well, which is shown in the "D" (direction) field as "D". + * + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at http://smartos.org/CDDL + * + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file. + * + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Copyright (c) 2012 Joyent Inc., All rights reserved. + * Copyright (c) 2012 Brendan Gregg, All rights reserved. + * + * TESTED: this fbt provider based script may only work on some OS versions. + * 121: ok + */ + +#pragma D option quiet +#pragma D option defaultargs +#pragma D option switchrate=10hz + +dtrace:::BEGIN +{ + printf("%-20s %-16s %1s %4s %6s %s\n", "TIME", "PROCESS", + "D", "KB", "ms", "FILE"); + min_ns = $1 * 1000000; +} + +/* see uts/common/fs/zfs/zfs_vnops.c */ + +fbt::zfs_read:entry, fbt::zfs_write:entry +{ + self->path = args[0]->v_path; + self->kb = args[1]->uio_resid / 1024; + self->start = timestamp; +} + +fbt::zfs_readdir:entry +{ + self->path = args[0]->v_path; + self->kb = 0; + self->start = timestamp; +} + +fbt::zfs_read:return, fbt::zfs_write:return, fbt::zfs_readdir:return +/self->start && (timestamp - self->start) >= min_ns/ +{ + this->iotime = (timestamp - self->start) / 1000; +/* this->dir = probefunc == "zfs_read" ? "R" : + probefunc == "zfs_write" ? "W" : "D"; + printf("%-20Y %-16s %1s %4d %6d %s\n", walltimestamp, + execname, this->dir, self->kb, this->iotime, + self->path != NULL ? stringof(self->path) : ""); */ + + @zfsIOlat[ this->dir = probefunc == "zfs_read" ? "R" : + probefunc == "zfs_write" ? "W" : "D" ] = quantize(this->iotime); +} + +fbt::zfs_read:return, fbt::zfs_write:return, fbt::zfs_readdir:return +{ + self->path = 0; self->kb = 0; self->start = 0; +} \ No newline at end of file diff --git a/zfssnoop.d b/zfssnoop.d new file mode 100644 index 0000000..ab30447 --- /dev/null +++ b/zfssnoop.d @@ -0,0 +1,59 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet +#pragma D option switchrate=10hz + +/* +Collect information about file calls against ZFS +filesystems and at termination aggregate counts +for each call +*/ + +dtrace:::BEGIN +{ + printf("%-21s %6s %6s %-12.12s %-12s %-4s %s\n", "TIMESTAMP", "UID", + "PID", "PROCESS", "CALL", "KB", "PATH"); +} + +/* see uts/common/fs/zfs/zfs_vnops.c */ + +fbt::zfs_read:entry, fbt::zfs_write:entry +{ + self->path = args[0]->v_path; + self->kb = args[1]->uio_resid / 1024; + self->time = walltimestamp; +} + +fbt::zfs_open:entry +{ + self->path = (*args[0])->v_path; + self->kb = 0; + self->time = walltimestamp; +} + +fbt::zfs_close:entry, fbt::zfs_ioctl:entry, fbt::zfs_getattr:entry, +fbt::zfs_readdir:entry +{ + self->path = args[0]->v_path; + self->kb = 0; + self->time = walltimestamp; +} + +fbt::zfs_read:entry, fbt::zfs_write:entry, fbt::zfs_open:entry, +fbt::zfs_close:entry, fbt::zfs_ioctl:entry, fbt::zfs_getattr:entry, +fbt::zfs_readdir:entry +{ + @[probefunc,stringof(self->path)] = count(); + printf("%-21Y %6d %6d %-12.12s %-12s %-4d %s\n", self->time, + uid, pid, execname, probefunc, self->kb, + self->path != NULL ? stringof(self->path) : ""); + self->path = 0; self->kb = 0; +} + +dtrace:::END +{ + printf("%s\n", " ... "); + printf("%-12s %-60s %-8s\n", "CALL", "PATH", "COUNT"); + printa("%-12s %-60s %@-8d\n", @); + exit(0); +} diff --git a/ziolatency.d b/ziolatency.d index 6fecb14..4b0b9f7 100644 --- a/ziolatency.d +++ b/ziolatency.d @@ -1,11 +1,7 @@ #!/usr/sbin/dtrace -s #pragma D option quiet -#pragma D option dynvarsize=40m - -/* Description: Trace I/O latency on a per-vdev basis. Script will not be maintained for the foreseeable future. */ -/* Author: Kirill.Davydychev@Nexenta.com */ -/* Copyright 2012, Nexenta Systems, Inc. All rights reserved. */ +#pragma D option dynvarsize=4m dtrace:::BEGIN { @@ -32,40 +28,23 @@ fbt::zio_vdev_io_start:entry } fbt::zio_vdev_io_done:entry -/args[0]->io_type != 0 && start_time[arg0] > 0/ /* && args[0]->io_vd && args[0]->io_vd->vdev_path */ +/args[0]->io_type != 0 && start_time[arg0] > 0 && args[0]->io_vd && args[0]->io_vd->vdev_path/ { this->iotime = (timestamp - start_time[arg0])/1000; + this->path = stringof(args[0]->io_vd->vdev_path); this->zpool = stringof(args[0]->io_spa->spa_name); this->iotype = ziotype[args[0]->io_type]; this->childtype = ziochild[args[0]->io_child_type]; - this->path = args[0]->io_vd ? - args[0]->io_vd->vdev_path ? - stringof(args[0]->io_vd->vdev_path) : - "top_level_vdev" : - "pool"; - - this->vdev_id = args[0]->io_vd ? - args[0]->io_vd->vdev_id : - 404; /* Not Found (pool) */ - this->vdev_pa = args[0]->io_vd ? - args[0]->io_vd->vdev_parent ? - args[0]->io_vd->vdev_parent->vdev_id : - 12455 : /* L2ARC has no parent - set to 12455 (L2ArcSSd) */ - 404; /* Not found */ + this->vdev_id = args[0]->io_vd->vdev_id; /* XXX - Describe abnormal behaviors to watch out for */ - @latency[this->zpool, this->childtype, this->vdev_pa, this->vdev_id, this->iotype, this->path] = quantize(this->iotime); - @avg_lat[this->zpool, this->childtype, this->vdev_pa, this->vdev_id, this->iotype, this->path] = avg(this->iotime); - @sum_lat[this->zpool, this->childtype, this->vdev_pa, this->vdev_id, this->iotype, this->path] = sum(this->iotime); + @latency[this->zpool, this->vdev_id, this->iotype, this->childtype, this->path] = quantize(this->iotime); + @avg_lat[this->zpool, this->vdev_id, this->iotype, this->childtype, this->path] = avg(this->iotime); } dtrace:::END { - printa("ZPool: %12s IOChild: %7s ParentVdevID: %5d ThisVdevID: %3d IOType: %5s Disk: %s\t Latency distribution:%@d\n",@latency); - printa("ZPool: %12s IOChild: %7s ParentVdevID: %5d ThisVdevID: %3d IOType: %5s Disk: %s\t AvgLatency(us): %@d\n",@avg_lat); - printa("ZPool: %12s IOChild: %7s ParentVdevID: %5d ThisVdevID: %3d IOType: %5s Disk: %s\t TotLatency(us): %@d\n",@sum_lat); - trunc(@latency); - trunc(@avg_lat); - trunc(@sum_lat); + printa("ZPool: %s\tChildVdevID: %d\tIOType: %s\tIOChild: %s\tDisk: %s\t Latency distribution(us):%@d\n",@latency); + printa("ZPool: %s\tChildVdevID: %d\tIOType: %s\tIOChild: %s\tDisk: %s\t AvgLatency(us): %@d\n",@avg_lat); } From bba0392f941fb4019e45c148009219b58f813085 Mon Sep 17 00:00:00 2001 From: Sam Zaydel Date: Wed, 9 Jan 2013 08:50:14 -0800 Subject: [PATCH 2/4] Added two new scripts scsilatencydist.d and vnodeops.d, both need some work. --- vnodeops.d | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 vnodeops.d diff --git a/vnodeops.d b/vnodeops.d new file mode 100644 index 0000000..025416b --- /dev/null +++ b/vnodeops.d @@ -0,0 +1,75 @@ +#!/usr/sbin/dtrace -s +/* + * vnodeops.d show latency in microseconds at the VFS layer for the various + * vnode types when reads, writes or fsyncs occur. + * + * This is based on scsilatency.d from the DTrace book, chapter 4. + * + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at http://smartos.org/CDDL + * + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file. + * + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * Copyright (c) 2013 RackTop Systems, All rights reserved. + * Copyright (c) 2013 Sam Zaydel, All rights reserved. + * + * CDDL HEADER END + * +*/ + #pragma D option quiet + +string vnode_type[uchar_t]; + +dtrace:::BEGIN +{ + /* See /usr/src/uts/common/sys/vnode.h for the full list. */ + vnode_type[0] = "no_type"; + vnode_type[1] = "regular"; + vnode_type[2] = "directory"; + vnode_type[3] = "block_device"; + vnode_type[4] = "character_device"; + vnode_type[5] = "link"; + vnode_type[6] = "fifo"; + vnode_type[7] = "door"; + vnode_type[8] = "procfs"; + vnode_type[9] = "sockfs"; + vnode_type[10] = "event_port"; + vnode_type[11] = "bad_vnode"; + + printf("Tracing... Hit Ctrl-C to end.\n\n"); +} + +fbt:genunix:fop_fsync:entry, +fbt:genunix:fop_read:entry, +fbt:genunix:fop_write:entry { + + self->start = timestamp; + this->type = args[0]->v_type; + self->vnodetype = vnode_type[this->type] != NULL ? vnode_type[this->type] : "NULL"; +} + +fbt:genunix:fop_fsync:return, +fbt:genunix:fop_read:return, +fbt:genunix:fop_write:return +/self->start/ +{ + this->delta = timestamp - self->start; + @latbytype["Time(us):", self->vnodetype, probefunc] = quantize(this->delta / 1000); + self->start = 0; + this->delta = 0; + self->vnodetype = "x"; +} From e97b7833dac8e04e2bbab27e1ca1d6fb40dad6b6 Mon Sep 17 00:00:00 2001 From: Sam Zaydel Date: Sun, 21 Apr 2013 07:34:10 -0700 Subject: [PATCH 3/4] Added a few scripts some time ago and never updated remote. --- filemon_zfs.d | 15 ++++++------ zfslatency.d | 63 +++++++++++++++++++++++-------------------------- zfsrwtimesize.d | 54 +++++++++++++++++++++++++----------------- 3 files changed, 69 insertions(+), 63 deletions(-) diff --git a/filemon_zfs.d b/filemon_zfs.d index b8c1831..3ec3794 100644 --- a/filemon_zfs.d +++ b/filemon_zfs.d @@ -46,7 +46,7 @@ BEGIN { - printf ("%20s%20s%8s%10s%10s\n", "DATE", "CMD", "R/W/D", "USER", "PID"); + printf ("%20s%20s%8s%20s%10s%10s\n", "DATE", "CMD", "R/W/D", "PATH", "USER", "PID"); } zfs_read:entry, @@ -65,8 +65,9 @@ zfs_write:return, zfs_putpage:return / strstr(stringof(self->filepath), $1) != NULL / { - printf("%20Y%20s%8s%10d%10d\n", - walltimestamp, execname, "W", uid, pid); + + printf("%20Y%20s%8s% 20s%10d%10d\n", + walltimestamp, execname, "W", stringof(self->filepath), uid, pid); self->filepath = 0; } @@ -74,8 +75,8 @@ zfs_read:return, zfs_getpage:return / strstr(stringof(self->filepath), $1) != NULL / { - printf("%20Y%20s%8s%10d%10d\n", - walltimestamp, execname, "R", uid, pid); + printf("%20Y%20s%8s% 20s%10d%10d\n", + walltimestamp, execname, "R", stringof(self->filepath), uid, pid); self->filepath = 0; } @@ -88,6 +89,6 @@ zfs_remove:entry zfs_remove:return / strstr(stringof(self->filepath), $1) != NULL / { - printf("%20Y%20s%8s%10d%10d\n", - walltimestamp, execname, "D", uid, pid); + printf("%20Y%20s%8s% 20s%10d%10d\n", + walltimestamp, execname, "D", stringof(self->filepath), uid, pid); } diff --git a/zfslatency.d b/zfslatency.d index 247df17..b66b669 100644 --- a/zfslatency.d +++ b/zfslatency.d @@ -6,59 +6,54 @@ dtrace:::BEGIN { - + rx = 0; wx = 0; } /* see uts/common/fs/zfs/zfs_vnops.c */ fbt::zfs_read:entry, -fbt::zfs_write:entry +fbt::zfs_write:entry, +fbt::zfs_fsync:entry { self->path = args[0]->v_path; - self->bytes = args[1]->uio_resid; + self->bytes = ((uio_t *)arg1)->uio_resid; self->start = timestamp; + self->id = self->start+self->bytes; } fbt::zfs_read:return -/ this->iotime = (timestamp - self->start) / +/ self->bytes != NULL && self->start+self->bytes == self->id / + { - this->latency_ms = this->iotime / 1000; -/* printf("read %5d %5d\n", self->bytes, this->latency_ms); */ -/* @plots[args[1]->dev_statname, args[0]->b_bcount] = quantize(this->delta); */ - /* Figure out average byte size*/ - /* this->size = args[0]->b_bcount; */ - @bsize["read_b"] = avg(self->bytes); - @avg_latency["read_lat"] = avg(this->latency_ms); + this->latency = (timestamp - self->start) / 1000; + this->ops = "read"; + @bytes["bytes"] = sum(self->bytes); + @latency["Dist (bytes/us):", this->ops] = quantize(self->bytes/this->latency); } - fbt::zfs_write:return -/ this->iotime = (timestamp - self->start) / +/ self->bytes != NULL && self->start+self->bytes == self->id / + { - this->latency_ms = this->iotime / 1000; -/* printf("write %5d %5d\n", self->bytes, this->latency_ms); */ -/* @plots[args[1]->dev_statname, args[0]->b_bcount] = quantize(this->delta); */ - /* Figure out average byte size*/ - /* this->size = args[0]->b_bcount; */ - @bsize["write_b"] = avg(self->bytes); - @avg_latency["write_lat"] = avg(this->latency_ms); + this->latency = (timestamp - self->start) / 1000; + this->ops = "write"; + @latency["Dist (bytes/us):", this->ops] = quantize(self->bytes/this->latency); } -tick-1sec -/this->latency_ms > 0/ +fbt::zfs_read:return, +fbt::zfs_write:return +/ self->bytes != NULL && self->start+self->bytes == self->id / + { - printf("%u ",walltimestamp/1000000000); - printa("%s %@d ", @avg_latency); - printa("\t%s %@d ", @bsize); - printf("%s\n", ""); - self->path = 0; self->bytes = 0; self->start = 0; - clear(@avg_latency); clear(@bsize); + self->start = 0; + self->bytes = 0; + self->id = 0; } - -/* fbt::zfs_read:return, -fbt::zfs_write:return +tick-30sec { - self->path = 0; self->bytes = 0; self->start = 0; - clear(@avg_latency); clear(@bsize); -} */ \ No newline at end of file + printa("%18s %s %@d\n", @latency); + printa(@bytes); + trunc(@latency); + trunc(@bytes); +} \ No newline at end of file diff --git a/zfsrwtimesize.d b/zfsrwtimesize.d index c8195d1..e868aed 100644 --- a/zfsrwtimesize.d +++ b/zfsrwtimesize.d @@ -9,42 +9,49 @@ dtrace:::BEGIN fbt::zfs_read:entry, fbt::zfs_write:entry { - start_time[arg0] = timestamp; + self->ts = timestamp; /* Storing buffer size to make sure that io:::done probes are triggered only when this value is greater than 0 */ - this->path = args[0]->v_path; - this->size = args[1]->uio_resid; + self->path = args[0]->v_path; + self->size = args[1]->uio_resid; } - -fbt::zfs_read:return -/ this->start = start_time[arg0] / +fbt::zfs_write:return +/ self->ts / { - this->delta = (timestamp - this->start) / 1000; - printf("[READ] Device: %5s Size (bytes): %5d Time (us): %5u \n", - stringof(this->path), this->size, this->delta); + this->type = probefunc == "zfs_write" ? "Write" : "Read"; + this->delta = (timestamp - self->ts); + /* printf("[WRITE] Device: %5s Size (bytes): %5d Time (us): %5u \n", + stringof(self->path), self->size, this->delta); */ + this->p = self->path == NULL ? "Unknown" : self->path; + @writes[stringof(this->p)] = sum(self->size/(1024)); /* @plots[args[1]->dev_statname, args[0]->b_bcount] = quantize(this->delta); */ /* Figure out average byte size*/ /* this->size = args[0]->b_bcount; */ - @bsize["average read, bytes"] = avg(this->size); - @plots["read I/O, us"] = quantize(this->delta); - @avgs["average read I/O, us"] = avg(this->delta); - start_time[arg0] = 0; + @bsize["average write, bytes"] = avg(self->size); + @plots["write I/O, us"] = quantize(this->delta/1000); + @avgs["average write I/O, us"] = avg(this->delta/1000); + this->delta = 0; + self->ts = 0; } -fbt::zfs_write:return -/ this->start = start_time[arg0] / +fbt::zfs_read:return +/ self->ts / { - this->delta = (timestamp - this->start) / 1000; - printf("[WRITE] Device: %5s Size (bytes): %5d Time (us): %5u \n", - stringof(this->path), this->size, this->delta); + this->type = probefunc == "zfs_write" ? "Write" : "Read"; + this->delta = (timestamp - self->ts); + /* printf("[READ] Device: %5s Size (bytes): %5d Time (us): %5u \n", + stringof(self->path), self->size, this->delta); */ + this->p = self->path == NULL ? "Unknown" : self->path; + @reads[stringof(this->p)] = sum(self->size/(1024)); /* @plots[args[1]->dev_statname, args[0]->b_bcount] = quantize(this->delta); */ /* Figure out average byte size*/ /* this->size = args[0]->b_bcount; */ - @bsize["average write, bytes"] = avg(this->size); - @plots["write I/O, us"] = quantize(this->delta); - @avgs["average write I/O, us"] = avg(this->delta); - start_time[arg0] = 0; + @bsize["average read, bytes"] = avg(self->size); + @plots["read I/O, us"] = quantize(this->delta/1000); + @avgs["average read I/O, us"] = avg(this->delta/1000); + this->delta = 0; + self->ts = 0; } dtrace:::END @@ -53,4 +60,7 @@ dtrace:::END printa("\t%s %@d\n", @avgs); printa("\t%s %@d\n", @bsize); printa("\n %s\n%@d\n", @plots); + trunc(@reads, 10); trunc(@writes, 10); + printa("[READ] Device: %5s KBytes: %@d\n", @reads); + printa("[WRITE] Device: %5s KBytes: %@d\n", @writes); } From 4d33686d9c01003c4554a9a98ec0e69b2b85c6d7 Mon Sep 17 00:00:00 2001 From: Sam Zaydel Date: Mon, 22 Apr 2013 06:47:36 -0700 Subject: [PATCH 4/4] Fixed script to work correctly on SmartOS. --- arc_adjust.v2.d | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/arc_adjust.v2.d b/arc_adjust.v2.d index 78561ab..005a599 100644 --- a/arc_adjust.v2.d +++ b/arc_adjust.v2.d @@ -2,32 +2,34 @@ #pragma D option quiet arc_adjust:entry +/* Adjusted slightly to work on smartOS. Had to replace global `arc_meta_used var with +value from kstat data: `arc_stats.arcstat_meta_used.value.ui64. */ { - self->ts = walltimestamp; - printf("%Y 1836: adjustment = MIN( %d, %d)\n", walltimestamp, - (int64_t)(`arc_stats.arcstat_size.value.ui64 - `arc_stats.arcstat_c.value.ui64), - (int64_t)(`arc_anon->arcs_size + `arc_mru->arcs_size + `arc_meta_used - `arc_stats.arcstat_p.value.ui64)); - printf("arc_size = %d, arc_c = %d, arc_anon.size = %d, arc_mru.size = %d, arc_meta_used = %d, arc_p = %d\n", - `arc_stats.arcstat_size.value.ui64, - `arc_stats.arcstat_c.value.ui64, - `arc_anon->arcs_size, - `arc_mru->arcs_size, - `arc_meta_used, - `arc_stats.arcstat_p.value.ui64); - printf("arc_mfu.size = %d, arc_mfu_ghost.size = %d, arc_mru_ghost.size = %d, arc_l2c_only.size = %d\n", - `arc_mfu->arcs_size, - `arc_mfu_ghost->arcs_size, - `arc_mru_ghost->arcs_size, - `arc_l2c_only->arcs_size); + self->ts = walltimestamp; + printf("%Y 1836: adjustment = MIN( %d, %d)\n", walltimestamp, + (int64_t)(`arc_stats.arcstat_size.value.ui64 - `arc_stats.arcstat_c.value.ui64), + (int64_t)(`arc_anon->arcs_size + `arc_mru->arcs_size + `arc_stats.arcstat_meta_used.value.ui64 - `arc_stats.arcstat_p.value.ui64)); + printf("arc_size = %d, arc_c = %d, arc_anon.size = %d, arc_mru.size = %d, arc_meta_used = %d, arc_p = %d\n", + `arc_stats.arcstat_size.value.ui64, + `arc_stats.arcstat_c.value.ui64, + `arc_anon->arcs_size, + `arc_mru->arcs_size, + `arc_stats.arcstat_meta_used.value.ui64,, + `arc_stats.arcstat_p.value.ui64); + printf("arc_mfu.size = %d, arc_mfu_ghost.size = %d, arc_mru_ghost.size = %d, arc_l2c_only.size = %d\n", + `arc_mfu->arcs_size, + `arc_mfu_ghost->arcs_size, + `arc_mru_ghost->arcs_size, + `arc_l2c_only->arcs_size); } arc_shrink:entry { - printf("%Y 2085: to_free = MAX( %d, %d)\n", walltimestamp, - `arc_stats.arcstat_c.value.ui64 >> `arc_shrink_shift, `needfree*4096); + printf("%Y 2085: to_free = MAX( %d, %d)\n", walltimestamp, + `arc_stats.arcstat_c.value.ui64 >> `arc_shrink_shift, `needfree*4096); } arc_adjust:return { - printf("Returned from arc_adjust started at %Y %d ms later.\n", self->ts, (walltimestamp - self->ts)/1000000); + printf("Returned from arc_adjust started at %Y %d ms later.\n", self->ts, (walltimestamp - self->ts)/1000000); }