diff --git a/api/except.txt b/api/except.txt index 2a1287ce90..6700cab773 100644 --- a/api/except.txt +++ b/api/except.txt @@ -459,3 +459,4 @@ pkg syscall (freebsd-arm-cgo), type Statfs_t struct, Mntfromname [88]int8 pkg syscall (freebsd-arm-cgo), type Statfs_t struct, Mntonname [88]int8 pkg text/scanner, const GoTokens = 1012 pkg unicode, const Version = "10.0.0" +pkg runtime/pprof, func StartCPUProfile(io.Writer) error diff --git a/src/cmd/go/alldocs.go b/src/cmd/go/alldocs.go index 26fb337f86..8b3afdbb29 100644 --- a/src/cmd/go/alldocs.go +++ b/src/cmd/go/alldocs.go @@ -2958,6 +2958,30 @@ // Write a CPU profile to the specified file before exiting. // Writes test binary as -c would. // +// -pmuprofile pmu.out +// Write a PMU profile to the specified file before exiting. +// Writes test binary as -c would. +// +// -pmuevent event +// Specify which pmu event to sample. +// To sample PMU cycles, use -test.pmuevent=cycles. +// To sample PMU instructions, use -test.pmuevent=instructions. +// To sample PMU cache misses, use -test.pmuevent=cacheMisses. +// To sample PMU last-level cache read accesses, use -test.pmuevent=cacheLLReadAccesses. +// To sample PMU last-level cache read misses, use -test.pmuevent=cacheLLReadMisses. +// +// -pmuperiod n +// Set the sampling period for the pmu event specified by -pmuevent. +// +// -pmupreciseip n +// Set the precise ip level for the pmu event specified by -pmuevent. +// +// -pmukernelincl +// Count the kernel or not. +// +// -pmuhvincl +// Count the hypervisor or not. +// // -memprofile mem.out // Write an allocation profile to the file after all tests have passed. // Writes test binary as -c would. diff --git a/src/cmd/go/internal/test/test.go b/src/cmd/go/internal/test/test.go index 8440a83951..0be5a351a6 100644 --- a/src/cmd/go/internal/test/test.go +++ b/src/cmd/go/internal/test/test.go @@ -313,6 +313,30 @@ profile the tests during execution: Write a CPU profile to the specified file before exiting. Writes test binary as -c would. + -pmuprofile pmu.out + Write a PMU profile to the specified file before exiting. + Writes test binary as -c would. + + -pmuevent event + Specify which pmu event to sample. + To sample PMU cycles, use -test.pmuevent=cycles. + To sample PMU instructions, use -test.pmuevent=instructions. + To sample PMU cache misses, use -test.pmuevent=cacheMisses. + To sample PMU last-level cache read accesses, use -test.pmuevent=cacheLLReadAccesses. + To sample PMU last-level cache read misses, use -test.pmuevent=cacheLLReadMisses. + + -pmuperiod n + Set the sampling period for the pmu event specified by -pmuevent. + + -pmupreciseip n + Set the precise ip level for the pmu event specified by -pmuevent. + + -pmukernelincl + Count the kernel or not. + + -pmuhvincl + Count the hypervisor or not. + -memprofile mem.out Write an allocation profile to the file after all tests have passed. Writes test binary as -c would. diff --git a/src/cmd/go/internal/test/testflag.go b/src/cmd/go/internal/test/testflag.go index 138e1f9d2a..02075026f8 100644 --- a/src/cmd/go/internal/test/testflag.go +++ b/src/cmd/go/internal/test/testflag.go @@ -46,6 +46,12 @@ var testFlagDefn = []*cmdflag.Defn{ {Name: "coverprofile", PassToTest: true}, {Name: "cpu", PassToTest: true}, {Name: "cpuprofile", PassToTest: true}, + {Name: "pmuprofile", PassToTest: true}, + {Name: "pmuevent", PassToTest: true}, + {Name: "pmuperiod", PassToTest: true}, + {Name: "pmupreciseip", PassToTest: true}, + {Name: "pmukernelincl", BoolVar: new(bool), PassToTest: true}, + {Name: "pmuhvincl", BoolVar: new(bool), PassToTest: true}, {Name: "failfast", BoolVar: new(bool), PassToTest: true}, {Name: "list", PassToTest: true}, {Name: "memprofile", PassToTest: true}, @@ -157,7 +163,7 @@ func testFlags(usage func(), args []string) (packageNames, passToTest []string) testList = true case "timeout": testTimeout = value - case "blockprofile", "cpuprofile", "memprofile", "mutexprofile": + case "blockprofile", "cpuprofile", "pmuprofile", "memprofile", "mutexprofile": testProfile = "-" + f.Name testNeedBinary = true case "trace": diff --git a/src/net/http/pprof/pprof.go b/src/net/http/pprof/pprof.go index 35b3285a08..db2f6fa690 100644 --- a/src/net/http/pprof/pprof.go +++ b/src/net/http/pprof/pprof.go @@ -110,6 +110,52 @@ func serveError(w http.ResponseWriter, status int, txt string) { fmt.Fprintln(w, txt) } +func pmuProfile(w http.ResponseWriter, r *http.Request) error { + var eventConfig pprof.PMUEventConfig + eventConfig.Period = 1e7 // default value + + if period, err := strconv.ParseInt(r.FormValue("pmuperiod"), 10, 64); err == nil { + eventConfig.Period = period + } + if preciseIP, err := strconv.ParseInt(r.FormValue("pmupreciseip"), 10, 8); err == nil { + eventConfig.PreciseIP = int8(preciseIP) + } + if isKernelIncluded, err := strconv.ParseBool(r.FormValue("pmukernelincl")); err == nil { + eventConfig.IsKernelIncluded = isKernelIncluded + } + if isHvIncluded, err := strconv.ParseBool(r.FormValue("pmuhvincl")); err == nil { + eventConfig.IsHvIncluded = isHvIncluded + } + + switch eventName := r.FormValue("pmuevent"); eventName { + case "": + return pprof.StartPMUProfile(pprof.WithProfilingPMUCycles(w, &eventConfig)) + case "cycles": + return pprof.StartPMUProfile(pprof.WithProfilingPMUCycles(w, &eventConfig)) + case "instructions": + return pprof.StartPMUProfile(pprof.WithProfilingPMUInstructions(w, &eventConfig)) + case "cacheReferences": + return pprof.StartPMUProfile(pprof.WithProfilingPMUCacheReferences(w, &eventConfig)) + case "cacheMisses": + return pprof.StartPMUProfile(pprof.WithProfilingPMUCacheMisses(w, &eventConfig)) + case "cacheLLReadAccesses": + return pprof.StartPMUProfile(pprof.WithProfilingPMUCacheLLReadAccesses(w, &eventConfig)) + case "cacheLLReadMisses": + return pprof.StartPMUProfile(pprof.WithProfilingPMUCacheLLReadMisses(w, &eventConfig)) + default: + // Is this a raw event? + if strings.HasPrefix(eventName, "r") { + if rawHexEvent, err := strconv.ParseInt(eventName[1:], 16, 64); err == nil { + eventConfig.RawEvent = rawHexEvent + return pprof.StartPMUProfile(pprof.WithProfilingPMURaw(w, &eventConfig)) + } + return fmt.Errorf("Incorrect hex format for raw event") + } else { + return fmt.Errorf("Unknown or not yet implemented event") + } + } +} + // Profile responds with the pprof-formatted cpu profile. // Profiling lasts for duration specified in seconds GET parameter, or for 30 seconds if not specified. // The package initialization registers it as /debug/pprof/profile. @@ -129,14 +175,31 @@ func Profile(w http.ResponseWriter, r *http.Request) { // because if it does it starts writing. w.Header().Set("Content-Type", "application/octet-stream") w.Header().Set("Content-Disposition", `attachment; filename="profile"`) - if err := pprof.StartCPUProfile(w); err != nil { - // StartCPUProfile failed, so no writes yet. - serveError(w, http.StatusInternalServerError, - fmt.Sprintf("Could not enable CPU profiling: %s", err)) - return + + isPMUEnabled, err := strconv.ParseBool(r.FormValue("pmu")) + + if (err != nil) || (!isPMUEnabled) { + if err = pprof.StartCPUProfile(w); err != nil { + serveError(w, http.StatusInternalServerError, + fmt.Sprintf("Could not enable CPU profiling: %s", err)) + return + } + } else { + // err == nil and isPMUEnabled == true + if err = pmuProfile(w, r); err != nil { + serveError(w, http.StatusInternalServerError, + fmt.Sprintf("Could not enable PMU profiling: %s", err)) + return + } } + sleep(w, time.Duration(sec)*time.Second) - pprof.StopCPUProfile() + + if isPMUEnabled { + pprof.StopPMUProfile() + } else { + pprof.StopCPUProfile() + } } // Trace responds with the execution trace in binary form. diff --git a/src/runtime/asm_386.s b/src/runtime/asm_386.s index b98843e73e..d94cdf1fb4 100644 --- a/src/runtime/asm_386.s +++ b/src/runtime/asm_386.s @@ -1566,6 +1566,18 @@ TEXT runtime·panicExtendSlice3CU(SB),NOSPLIT,$0-12 MOVL CX, y+8(FP) JMP runtime·goPanicExtendSlice3CU(SB) +TEXT runtime·mb(SB),NOSPLIT,$0-0 + MFENCE + RET + +TEXT runtime·rmb(SB),NOSPLIT,$0-0 + LFENCE + RET + +TEXT runtime·wmb(SB),NOSPLIT,$0-0 + SFENCE + RET + #ifdef GOOS_android // Use the free TLS_SLOT_APP slot #2 on Android Q. // Earlier androids are set up in gcc_android.c. diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s index fd3a9c3127..160223d6e2 100644 --- a/src/runtime/asm_amd64.s +++ b/src/runtime/asm_amd64.s @@ -1721,6 +1721,18 @@ TEXT runtime·panicSlice3CU(SB),NOSPLIT,$0-16 MOVQ CX, y+8(FP) JMP runtime·goPanicSlice3CU(SB) +TEXT runtime·mb(SB),NOSPLIT,$0-0 + MFENCE + RET + +TEXT runtime·rmb(SB),NOSPLIT,$0-0 + LFENCE + RET + +TEXT runtime·wmb(SB),NOSPLIT,$0-0 + SFENCE + RET + #ifdef GOOS_android // Use the free TLS_SLOT_APP slot #2 on Android Q. // Earlier androids are set up in gcc_android.c. diff --git a/src/runtime/cpuprof.go b/src/runtime/cpuprof.go index 81038f5c48..8b7162356b 100644 --- a/src/runtime/cpuprof.go +++ b/src/runtime/cpuprof.go @@ -20,7 +20,7 @@ import ( const maxCPUProfStack = 64 -type cpuProfile struct { +type profile struct { lock mutex on bool // profiling is on log *profBuf // profile events written here @@ -41,7 +41,8 @@ type cpuProfile struct { lostExtra uint64 // count of frames lost because extra is full } -var cpuprof cpuProfile +var cpuprof profile +var pmuprof [GO_COUNT_PMU_EVENTS_MAX]profile // SetCPUProfileRate sets the CPU profiling rate to hz samples per second. // If hz <= 0, SetCPUProfileRate turns off profiling. @@ -60,15 +61,16 @@ func SetCPUProfileRate(hz int) { } lock(&cpuprof.lock) + defer unlock(&cpuprof.lock) if hz > 0 { if cpuprof.on || cpuprof.log != nil { print("runtime: cannot set cpu profile rate until previous profile has finished.\n") - unlock(&cpuprof.lock) return } cpuprof.on = true - cpuprof.log = newProfBuf(1, 1<<17, 1<<14) + // Enlarging the buffer words and tags reduces the number of samples lost at the cost of larger amounts of memory + cpuprof.log = newProfBuf( /* header size */ 1 /* buffer words */, 1<<17 /* tags */, 1<<14) hdr := [1]uint64{uint64(hz)} cpuprof.log.write(nil, nanotime(), hdr[:], nil) setcpuprofilerate(int32(hz)) @@ -78,7 +80,42 @@ func SetCPUProfileRate(hz int) { cpuprof.addExtra() cpuprof.log.close() } - unlock(&cpuprof.lock) +} + +func SetPMUProfile(eventId int, eventAttr *PMUEventAttr) { + lock(&pmuprof[eventId].lock) + defer unlock(&pmuprof[eventId].lock) + if eventAttr != nil { + if pmuprof[eventId].on || pmuprof[eventId].log != nil { + print("runtime: cannot set pmu profile rate until previous profile has finished.\n") + return + } + + pmuprof[eventId].on = true + // Enlarging the buffer words and tags reduces the number of samples lost at the cost of larger amounts of memory + pmuprof[eventId].log = newProfBuf( /* header size */ 1 /* buffer words */, 1<<17 /* tags */, 1<<14) + hdr := [1]uint64{eventAttr.Period} + pmuprof[eventId].log.write(nil, nanotime(), hdr[:], nil) + setpmuprofile(int32(eventId), eventAttr) + } else if pmuprof[eventId].on { + setpmuprofile(int32(eventId), nil) + pmuprof[eventId].on = false + pmuprof[eventId].addExtra(eventId) + pmuprof[eventId].log.close() + } +} + +//go:nowritebarrierrec +func (p *profile) addImpl(gp *g, stk []uintptr, cpuorpmuprof *profile) { + if p.numExtra > 0 || p.lostExtra > 0 { + p.addExtra() + } + hdr := [1]uint64{1} + // Note: write "knows" that the argument is &gp.labels, + // because otherwise its write barrier behavior may not + // be correct. See the long comment there before + // changing the argument here. + cpuorpmuprof.log.write(&gp.labels, nanotime(), hdr[:], stk) } // add adds the stack trace to the profile. @@ -87,25 +124,38 @@ func SetCPUProfileRate(hz int) { // held at the time of the signal, nor can it use substantial amounts // of stack. //go:nowritebarrierrec -func (p *cpuProfile) add(gp *g, stk []uintptr) { - // Simple cas-lock to coordinate with setcpuprofilerate. - for !atomic.Cas(&prof.signalLock, 0, 1) { - osyield() - } - - if prof.hz != 0 { // implies cpuprof.log != nil - if p.numExtra > 0 || p.lostExtra > 0 { - p.addExtra() +func (p *profile) add(gp *g, stk []uintptr, eventIds ...int) { + if len(eventIds) == 0 { + for !atomic.Cas(&prof.signalLock, 0, 1) { + osyield() + } + if prof.hz != 0 { // implies cpuprof.log != nil + p.addImpl(gp, stk, &cpuprof) + } + atomic.Store(&prof.signalLock, 0) + } else { + eventId := eventIds[0] + for !atomic.Cas(&pmuEvent[eventId].signalLock, 0, 1) { + osyield() + } + if pmuEvent[eventId].eventAttr != nil { // implies pmuprof[eventId].log != nil + p.addImpl(gp, stk, &pmuprof[eventId]) } - hdr := [1]uint64{1} - // Note: write "knows" that the argument is &gp.labels, - // because otherwise its write barrier behavior may not - // be correct. See the long comment there before - // changing the argument here. - cpuprof.log.write(&gp.labels, nanotime(), hdr[:], stk) + atomic.Store(&pmuEvent[eventId].signalLock, 0) } +} - atomic.Store(&prof.signalLock, 0) +//go:nosplit +//go:nowritebarrierrec +func (p *profile) addNonGoImpl(stk []uintptr, prof *profile) { + if prof.numExtra+1+len(stk) < len(prof.extra) { + i := prof.numExtra + prof.extra[i] = uintptr(1 + len(stk)) + copy(prof.extra[i+1:], stk) + prof.numExtra += 1 + len(stk) + } else { + prof.lostExtra++ + } } // addNonGo adds the non-Go stack trace to the profile. @@ -117,25 +167,26 @@ func (p *cpuProfile) add(gp *g, stk []uintptr) { // gets the signal handling event. //go:nosplit //go:nowritebarrierrec -func (p *cpuProfile) addNonGo(stk []uintptr) { - // Simple cas-lock to coordinate with SetCPUProfileRate. - // (Other calls to add or addNonGo should be blocked out - // by the fact that only one SIGPROF can be handled by the - // process at a time. If not, this lock will serialize those too.) - for !atomic.Cas(&prof.signalLock, 0, 1) { - osyield() - } - - if cpuprof.numExtra+1+len(stk) < len(cpuprof.extra) { - i := cpuprof.numExtra - cpuprof.extra[i] = uintptr(1 + len(stk)) - copy(cpuprof.extra[i+1:], stk) - cpuprof.numExtra += 1 + len(stk) +func (p *profile) addNonGo(stk []uintptr, eventIds ...int) { + if len(eventIds) == 0 { + // Simple cas-lock to coordinate with SetCPUProfileRate. + // (Other calls to add or addNonGo should be blocked out + // by the fact that only one SIGPROF can be handled by the + // process at a time. If not, this lock will serialize those too.) + for !atomic.Cas(&prof.signalLock, 0, 1) { + osyield() + } + p.addNonGoImpl(stk, &cpuprof) + atomic.Store(&prof.signalLock, 0) } else { - cpuprof.lostExtra++ + eventId := eventIds[0] + // Only one SIGPROF for each PMU event can be handled by the process at a time. + for !atomic.Cas(&pmuEvent[eventId].signalLock, 0, 1) { + osyield() + } + p.addNonGoImpl(stk, &pmuprof[eventId]) + atomic.Store(&pmuEvent[eventId].signalLock, 0) } - - atomic.Store(&prof.signalLock, 0) } // addExtra adds the "extra" profiling events, @@ -143,7 +194,7 @@ func (p *cpuProfile) addNonGo(stk []uintptr) { // addExtra is called either from a signal handler on a Go thread // or from an ordinary goroutine; either way it can use stack // and has a g. The world may be stopped, though. -func (p *cpuProfile) addExtra() { +func (p *profile) addExtra(eventIds ...int) { // Copy accumulated non-Go profile events. hdr := [1]uint64{1} for i := 0; i < p.numExtra; { @@ -159,18 +210,28 @@ func (p *cpuProfile) addExtra() { funcPC(_LostExternalCode) + sys.PCQuantum, funcPC(_ExternalCode) + sys.PCQuantum, } - cpuprof.log.write(nil, 0, hdr[:], lostStk[:]) + if len(eventIds) == 0 { + cpuprof.log.write(nil, 0, hdr[:], lostStk[:]) + } else { + eventId := eventIds[0] + pmuprof[eventId].log.write(nil, 0, hdr[:], lostStk[:]) + } p.lostExtra = 0 } } -func (p *cpuProfile) addLostAtomic64(count uint64) { +func (p *profile) addLostAtomic64(count uint64, eventIds ...int) { hdr := [1]uint64{count} lostStk := [2]uintptr{ funcPC(_LostSIGPROFDuringAtomic64) + sys.PCQuantum, funcPC(_System) + sys.PCQuantum, } - cpuprof.log.write(nil, 0, hdr[:], lostStk[:]) + if len(eventIds) == 0 { + cpuprof.log.write(nil, 0, hdr[:], lostStk[:]) + } else { + eventId := eventIds[0] + pmuprof[eventId].log.write(nil, 0, hdr[:], lostStk[:]) + } } // CPUProfile panics. @@ -191,6 +252,19 @@ func runtime_pprof_runtime_cyclesPerSecond() int64 { return tickspersecond() } +func readProfileImpl(prof *profile) ([]uint64, []unsafe.Pointer, bool) { + lock(&prof.lock) + log := prof.log + unlock(&prof.lock) + data, tags, eof := log.read(profBufBlocking) + if len(data) == 0 && eof { + lock(&prof.lock) + prof.log = nil + unlock(&prof.lock) + } + return data, tags, eof +} + // readProfile, provided to runtime/pprof, returns the next chunk of // binary CPU profiling stack trace data, blocking until data is available. // If profiling is turned off and all the profile data accumulated while it was @@ -198,15 +272,11 @@ func runtime_pprof_runtime_cyclesPerSecond() int64 { // The caller must save the returned data and tags before calling readProfile again. // //go:linkname runtime_pprof_readProfile runtime/pprof.readProfile -func runtime_pprof_readProfile() ([]uint64, []unsafe.Pointer, bool) { - lock(&cpuprof.lock) - log := cpuprof.log - unlock(&cpuprof.lock) - data, tags, eof := log.read(profBufBlocking) - if len(data) == 0 && eof { - lock(&cpuprof.lock) - cpuprof.log = nil - unlock(&cpuprof.lock) +func runtime_pprof_readProfile(eventIds ...int) ([]uint64, []unsafe.Pointer, bool) { + if len(eventIds) == 0 { + return readProfileImpl(&cpuprof) + } else { + eventId := eventIds[0] + return readProfileImpl(&pmuprof[eventId]) } - return data, tags, eof } diff --git a/src/runtime/defs_linux_386.go b/src/runtime/defs_linux_386.go index e2fcbcac71..0ade3f80de 100644 --- a/src/runtime/defs_linux_386.go +++ b/src/runtime/defs_linux_386.go @@ -13,9 +13,11 @@ const ( _PROT_WRITE = 0x2 _PROT_EXEC = 0x4 - _MAP_ANON = 0x20 - _MAP_PRIVATE = 0x2 - _MAP_FIXED = 0x10 + _MAP_SHARED = 0x1 + _MAP_PRIVATE = 0x2 + _MAP_SHARED_VALIDATE = 0x3 + _MAP_FIXED = 0x10 + _MAP_ANON = 0x20 _MADV_DONTNEED = 0x4 _MADV_FREE = 0x8 @@ -97,6 +99,135 @@ const ( _SOCK_DGRAM = 0x2 ) +// The replication is because constants could be different on different architecture +const ( + _PERF_TYPE_HARDWARE = 0x0 + _PERF_TYPE_SOFTWARE = 0x1 + _PERF_TYPE_TRACEPOINT = 0x2 + _PERF_TYPE_HW_CACHE = 0x3 + _PERF_TYPE_RAW = 0x4 + _PERF_TYPE_BREAKPOINT = 0x5 + + _PERF_COUNT_HW_CPU_CYCLES = 0x0 + _PERF_COUNT_HW_INSTRUCTIONS = 0x1 + _PERF_COUNT_HW_CACHE_REFERENCES = 0x2 + _PERF_COUNT_HW_CACHE_MISSES = 0x3 + _PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 0x4 + _PERF_COUNT_HW_BRANCH_MISSES = 0x5 + _PERF_COUNT_HW_BUS_CYCLES = 0x6 + _PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 0x7 + _PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 0x8 + _PERF_COUNT_HW_REF_CPU_CYCLES = 0x9 + + _PERF_COUNT_HW_CACHE_L1D = 0x0 + _PERF_COUNT_HW_CACHE_L1I = 0x1 + _PERF_COUNT_HW_CACHE_LL = 0x2 + _PERF_COUNT_HW_CACHE_DTLB = 0x3 + _PERF_COUNT_HW_CACHE_ITLB = 0x4 + _PERF_COUNT_HW_CACHE_BPU = 0x5 + _PERF_COUNT_HW_CACHE_NODE = 0x6 + + _PERF_COUNT_HW_CACHE_OP_READ = 0x0 + _PERF_COUNT_HW_CACHE_OP_WRITE = 0x1 + _PERF_COUNT_HW_CACHE_OP_PREFETCH = 0x2 + + _PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0x0 + _PERF_COUNT_HW_CACHE_RESULT_MISS = 0x1 + + _PERF_COUNT_SW_CPU_CLOCK = 0x0 + _PERF_COUNT_SW_TASK_CLOCK = 0x1 + _PERF_COUNT_SW_PAGE_FAULTS = 0x2 + _PERF_COUNT_SW_CONTEXT_SWITCHES = 0x3 + _PERF_COUNT_SW_CPU_MIGRATIONS = 0x4 + _PERF_COUNT_SW_PAGE_FAULTS_MIN = 0x5 + _PERF_COUNT_SW_PAGE_FAULTS_MAJ = 0x6 + _PERF_COUNT_SW_ALIGNMENT_FAULTS = 0x7 + _PERF_COUNT_SW_EMULATION_FAULTS = 0x8 + _PERF_COUNT_SW_DUMMY = 0x9 + _PERF_COUNT_SW_BPF_OUTPUT = 0xa + + _PERF_SAMPLE_IP = 0x1 + _PERF_SAMPLE_TID = 0x2 + _PERF_SAMPLE_TIME = 0x4 + _PERF_SAMPLE_ADDR = 0x8 + _PERF_SAMPLE_READ = 0x10 + _PERF_SAMPLE_CALLCHAIN = 0x20 + _PERF_SAMPLE_ID = 0x40 + _PERF_SAMPLE_CPU = 0x80 + _PERF_SAMPLE_PERIOD = 0x100 + _PERF_SAMPLE_STREAM_ID = 0x200 + _PERF_SAMPLE_RAW = 0x400 + _PERF_SAMPLE_BRANCH_STACK = 0x800 + + _PERF_SAMPLE_BRANCH_USER = 0x1 + _PERF_SAMPLE_BRANCH_KERNEL = 0x2 + _PERF_SAMPLE_BRANCH_HV = 0x4 + _PERF_SAMPLE_BRANCH_ANY = 0x8 + _PERF_SAMPLE_BRANCH_ANY_CALL = 0x10 + _PERF_SAMPLE_BRANCH_ANY_RETURN = 0x20 + _PERF_SAMPLE_BRANCH_IND_CALL = 0x40 + _PERF_SAMPLE_BRANCH_ABORT_TX = 0x80 + _PERF_SAMPLE_BRANCH_IN_TX = 0x100 + _PERF_SAMPLE_BRANCH_NO_TX = 0x200 + _PERF_SAMPLE_BRANCH_COND = 0x400 + _PERF_SAMPLE_BRANCH_CALL_STACK = 0x800 + _PERF_SAMPLE_BRANCH_IND_JUMP = 0x1000 + _PERF_SAMPLE_BRANCH_CALL = 0x2000 + _PERF_SAMPLE_BRANCH_NO_FLAGS = 0x4000 + _PERF_SAMPLE_BRANCH_NO_CYCLES = 0x8000 + _PERF_SAMPLE_BRANCH_TYPE_SAVE = 0x10000 + + _PERF_FORMAT_TOTAL_TIME_ENABLED = 0x1 + _PERF_FORMAT_TOTAL_TIME_RUNNING = 0x2 + _PERF_FORMAT_ID = 0x4 + _PERF_FORMAT_GROUP = 0x8 + + _PERF_RECORD_MISC_EXACT_IP = 0x4000 + + _PERF_RECORD_MMAP = 0x1 + _PERF_RECORD_LOST = 0x2 + _PERF_RECORD_COMM = 0x3 + _PERF_RECORD_EXIT = 0x4 + _PERF_RECORD_THROTTLE = 0x5 + _PERF_RECORD_UNTHROTTLE = 0x6 + _PERF_RECORD_FORK = 0x7 + _PERF_RECORD_READ = 0x8 + _PERF_RECORD_SAMPLE = 0x9 + _PERF_RECORD_MMAP2 = 0xa + _PERF_RECORD_AUX = 0xb + _PERF_RECORD_ITRACE_START = 0xc + _PERF_RECORD_LOST_SAMPLES = 0xd + _PERF_RECORD_SWITCH = 0xe + _PERF_RECORD_SWITCH_CPU_WIDE = 0xf + _PERF_RECORD_NAMESPACES = 0x10 + + _PERF_CONTEXT_HV = -0x20 + _PERF_CONTEXT_KERNEL = -0x80 + _PERF_CONTEXT_USER = -0x200 + + _PERF_CONTEXT_GUEST = -0x800 + _PERF_CONTEXT_GUEST_KERNEL = -0x880 + _PERF_CONTEXT_GUEST_USER = -0xa00 + + _PERF_FLAG_FD_NO_GROUP = 0x1 + _PERF_FLAG_FD_OUTPUT = 0x2 + _PERF_FLAG_PID_CGROUP = 0x4 + _PERF_FLAG_FD_CLOEXEC = 0x8 + + _PERF_EVENT_IOC_DISABLE = 0x2401 + _PERF_EVENT_IOC_ENABLE = 0x2400 + _PERF_EVENT_IOC_ID = 0x80042407 + _PERF_EVENT_IOC_MODIFY_ATTRIBUTES = 0x4004240b + _PERF_EVENT_IOC_PAUSE_OUTPUT = 0x40042409 + _PERF_EVENT_IOC_PERIOD = 0x40082404 + _PERF_EVENT_IOC_QUERY_BPF = 0xc004240a + _PERF_EVENT_IOC_REFRESH = 0x2402 + _PERF_EVENT_IOC_RESET = 0x2403 + _PERF_EVENT_IOC_SET_BPF = 0x40042408 + _PERF_EVENT_IOC_SET_FILTER = 0x40042406 + _PERF_EVENT_IOC_SET_OUTPUT = 0x2405 +) + type fpreg struct { significand [4]uint16 exponent uint16 @@ -164,6 +295,8 @@ type siginfo struct { si_code int32 // below here is a union; si_addr is the only field we use si_addr uint32 + // si_fd is the field right after the union + si_fd int32 } type stackt struct { @@ -225,3 +358,9 @@ type sockaddr_un struct { family uint16 path [108]byte } + +// psu: only tested on amd64 +type fOwnerEx struct { + Type int32 + Pid int32 +} diff --git a/src/runtime/defs_linux_amd64.go b/src/runtime/defs_linux_amd64.go index ddad7fddd4..d49d9c8bdf 100644 --- a/src/runtime/defs_linux_amd64.go +++ b/src/runtime/defs_linux_amd64.go @@ -13,9 +13,11 @@ const ( _PROT_WRITE = 0x2 _PROT_EXEC = 0x4 - _MAP_ANON = 0x20 - _MAP_PRIVATE = 0x2 - _MAP_FIXED = 0x10 + _MAP_SHARED = 0x1 + _MAP_PRIVATE = 0x2 + _MAP_SHARED_VALIDATE = 0x3 + _MAP_FIXED = 0x10 + _MAP_ANON = 0x20 _MADV_DONTNEED = 0x4 _MADV_FREE = 0x8 @@ -94,6 +96,135 @@ const ( _SOCK_DGRAM = 0x2 ) +// The replication is because constants could be different on different architecture +const ( + _PERF_TYPE_HARDWARE = 0x0 + _PERF_TYPE_SOFTWARE = 0x1 + _PERF_TYPE_TRACEPOINT = 0x2 + _PERF_TYPE_HW_CACHE = 0x3 + _PERF_TYPE_RAW = 0x4 + _PERF_TYPE_BREAKPOINT = 0x5 + + _PERF_COUNT_HW_CPU_CYCLES = 0x0 + _PERF_COUNT_HW_INSTRUCTIONS = 0x1 + _PERF_COUNT_HW_CACHE_REFERENCES = 0x2 + _PERF_COUNT_HW_CACHE_MISSES = 0x3 + _PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 0x4 + _PERF_COUNT_HW_BRANCH_MISSES = 0x5 + _PERF_COUNT_HW_BUS_CYCLES = 0x6 + _PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 0x7 + _PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 0x8 + _PERF_COUNT_HW_REF_CPU_CYCLES = 0x9 + + _PERF_COUNT_HW_CACHE_L1D = 0x0 + _PERF_COUNT_HW_CACHE_L1I = 0x1 + _PERF_COUNT_HW_CACHE_LL = 0x2 + _PERF_COUNT_HW_CACHE_DTLB = 0x3 + _PERF_COUNT_HW_CACHE_ITLB = 0x4 + _PERF_COUNT_HW_CACHE_BPU = 0x5 + _PERF_COUNT_HW_CACHE_NODE = 0x6 + + _PERF_COUNT_HW_CACHE_OP_READ = 0x0 + _PERF_COUNT_HW_CACHE_OP_WRITE = 0x1 + _PERF_COUNT_HW_CACHE_OP_PREFETCH = 0x2 + + _PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0x0 + _PERF_COUNT_HW_CACHE_RESULT_MISS = 0x1 + + _PERF_COUNT_SW_CPU_CLOCK = 0x0 + _PERF_COUNT_SW_TASK_CLOCK = 0x1 + _PERF_COUNT_SW_PAGE_FAULTS = 0x2 + _PERF_COUNT_SW_CONTEXT_SWITCHES = 0x3 + _PERF_COUNT_SW_CPU_MIGRATIONS = 0x4 + _PERF_COUNT_SW_PAGE_FAULTS_MIN = 0x5 + _PERF_COUNT_SW_PAGE_FAULTS_MAJ = 0x6 + _PERF_COUNT_SW_ALIGNMENT_FAULTS = 0x7 + _PERF_COUNT_SW_EMULATION_FAULTS = 0x8 + _PERF_COUNT_SW_DUMMY = 0x9 + _PERF_COUNT_SW_BPF_OUTPUT = 0xa + + _PERF_SAMPLE_IP = 0x1 + _PERF_SAMPLE_TID = 0x2 + _PERF_SAMPLE_TIME = 0x4 + _PERF_SAMPLE_ADDR = 0x8 + _PERF_SAMPLE_READ = 0x10 + _PERF_SAMPLE_CALLCHAIN = 0x20 + _PERF_SAMPLE_ID = 0x40 + _PERF_SAMPLE_CPU = 0x80 + _PERF_SAMPLE_PERIOD = 0x100 + _PERF_SAMPLE_STREAM_ID = 0x200 + _PERF_SAMPLE_RAW = 0x400 + _PERF_SAMPLE_BRANCH_STACK = 0x800 + + _PERF_SAMPLE_BRANCH_USER = 0x1 + _PERF_SAMPLE_BRANCH_KERNEL = 0x2 + _PERF_SAMPLE_BRANCH_HV = 0x4 + _PERF_SAMPLE_BRANCH_ANY = 0x8 + _PERF_SAMPLE_BRANCH_ANY_CALL = 0x10 + _PERF_SAMPLE_BRANCH_ANY_RETURN = 0x20 + _PERF_SAMPLE_BRANCH_IND_CALL = 0x40 + _PERF_SAMPLE_BRANCH_ABORT_TX = 0x80 + _PERF_SAMPLE_BRANCH_IN_TX = 0x100 + _PERF_SAMPLE_BRANCH_NO_TX = 0x200 + _PERF_SAMPLE_BRANCH_COND = 0x400 + _PERF_SAMPLE_BRANCH_CALL_STACK = 0x800 + _PERF_SAMPLE_BRANCH_IND_JUMP = 0x1000 + _PERF_SAMPLE_BRANCH_CALL = 0x2000 + _PERF_SAMPLE_BRANCH_NO_FLAGS = 0x4000 + _PERF_SAMPLE_BRANCH_NO_CYCLES = 0x8000 + _PERF_SAMPLE_BRANCH_TYPE_SAVE = 0x10000 + + _PERF_FORMAT_TOTAL_TIME_ENABLED = 0x1 + _PERF_FORMAT_TOTAL_TIME_RUNNING = 0x2 + _PERF_FORMAT_ID = 0x4 + _PERF_FORMAT_GROUP = 0x8 + + _PERF_RECORD_MISC_EXACT_IP = 0x4000 + + _PERF_RECORD_MMAP = 0x1 + _PERF_RECORD_LOST = 0x2 + _PERF_RECORD_COMM = 0x3 + _PERF_RECORD_EXIT = 0x4 + _PERF_RECORD_THROTTLE = 0x5 + _PERF_RECORD_UNTHROTTLE = 0x6 + _PERF_RECORD_FORK = 0x7 + _PERF_RECORD_READ = 0x8 + _PERF_RECORD_SAMPLE = 0x9 + _PERF_RECORD_MMAP2 = 0xa + _PERF_RECORD_AUX = 0xb + _PERF_RECORD_ITRACE_START = 0xc + _PERF_RECORD_LOST_SAMPLES = 0xd + _PERF_RECORD_SWITCH = 0xe + _PERF_RECORD_SWITCH_CPU_WIDE = 0xf + _PERF_RECORD_NAMESPACES = 0x10 + + _PERF_CONTEXT_HV = -0x20 + _PERF_CONTEXT_KERNEL = -0x80 + _PERF_CONTEXT_USER = -0x200 + + _PERF_CONTEXT_GUEST = -0x800 + _PERF_CONTEXT_GUEST_KERNEL = -0x880 + _PERF_CONTEXT_GUEST_USER = -0xa00 + + _PERF_FLAG_FD_NO_GROUP = 0x1 + _PERF_FLAG_FD_OUTPUT = 0x2 + _PERF_FLAG_PID_CGROUP = 0x4 + _PERF_FLAG_FD_CLOEXEC = 0x8 + + _PERF_EVENT_IOC_DISABLE = 0x2401 + _PERF_EVENT_IOC_ENABLE = 0x2400 + _PERF_EVENT_IOC_ID = 0x80082407 + _PERF_EVENT_IOC_MODIFY_ATTRIBUTES = 0x4008240b + _PERF_EVENT_IOC_PAUSE_OUTPUT = 0x40042409 + _PERF_EVENT_IOC_PERIOD = 0x40082404 + _PERF_EVENT_IOC_QUERY_BPF = 0xc008240a + _PERF_EVENT_IOC_REFRESH = 0x2402 + _PERF_EVENT_IOC_RESET = 0x2403 + _PERF_EVENT_IOC_SET_BPF = 0x40042408 + _PERF_EVENT_IOC_SET_FILTER = 0x40082406 + _PERF_EVENT_IOC_SET_OUTPUT = 0x2405 +) + type timespec struct { tv_sec int64 tv_nsec int64 @@ -127,6 +258,8 @@ type siginfo struct { si_code int32 // below here is a union; si_addr is the only field we use si_addr uint64 + // si_fd is the field right after the union + si_fd int32 } type itimerval struct { @@ -261,3 +394,8 @@ type sockaddr_un struct { family uint16 path [108]byte } + +type fOwnerEx struct { + Type int32 + Pid int32 +} diff --git a/src/runtime/defs_linux_arm.go b/src/runtime/defs_linux_arm.go index 9d10d664e1..01fa2fffe0 100644 --- a/src/runtime/defs_linux_arm.go +++ b/src/runtime/defs_linux_arm.go @@ -11,49 +11,52 @@ const ( _PROT_WRITE = 0x2 _PROT_EXEC = 0x4 - _MAP_ANON = 0x20 - _MAP_PRIVATE = 0x2 - _MAP_FIXED = 0x10 + _MAP_SHARED = 0x1 + _MAP_PRIVATE = 0x2 + _MAP_SHARED_VALIDATE = 0x3 + _MAP_FIXED = 0x10 + _MAP_ANON = 0x20 _MADV_DONTNEED = 0x4 _MADV_FREE = 0x8 _MADV_HUGEPAGE = 0xe _MADV_NOHUGEPAGE = 0xf - _SA_RESTART = 0x10000000 - _SA_ONSTACK = 0x8000000 - _SA_RESTORER = 0 // unused on ARM - _SA_SIGINFO = 0x4 - _SIGHUP = 0x1 - _SIGINT = 0x2 - _SIGQUIT = 0x3 - _SIGILL = 0x4 - _SIGTRAP = 0x5 - _SIGABRT = 0x6 - _SIGBUS = 0x7 - _SIGFPE = 0x8 - _SIGKILL = 0x9 - _SIGUSR1 = 0xa - _SIGSEGV = 0xb - _SIGUSR2 = 0xc - _SIGPIPE = 0xd - _SIGALRM = 0xe - _SIGSTKFLT = 0x10 - _SIGCHLD = 0x11 - _SIGCONT = 0x12 - _SIGSTOP = 0x13 - _SIGTSTP = 0x14 - _SIGTTIN = 0x15 - _SIGTTOU = 0x16 - _SIGURG = 0x17 - _SIGXCPU = 0x18 - _SIGXFSZ = 0x19 - _SIGVTALRM = 0x1a - _SIGPROF = 0x1b - _SIGWINCH = 0x1c - _SIGIO = 0x1d - _SIGPWR = 0x1e - _SIGSYS = 0x1f + _SA_RESTART = 0x10000000 + _SA_ONSTACK = 0x8000000 + _SA_RESTORER = 0 // unused on ARM + _SA_SIGINFO = 0x4 + _SIGHUP = 0x1 + _SIGINT = 0x2 + _SIGQUIT = 0x3 + _SIGILL = 0x4 + _SIGTRAP = 0x5 + _SIGABRT = 0x6 + _SIGBUS = 0x7 + _SIGFPE = 0x8 + _SIGKILL = 0x9 + _SIGUSR1 = 0xa + _SIGSEGV = 0xb + _SIGUSR2 = 0xc + _SIGPIPE = 0xd + _SIGALRM = 0xe + _SIGSTKFLT = 0x10 + _SIGCHLD = 0x11 + _SIGCONT = 0x12 + _SIGSTOP = 0x13 + _SIGTSTP = 0x14 + _SIGTTIN = 0x15 + _SIGTTOU = 0x16 + _SIGURG = 0x17 + _SIGXCPU = 0x18 + _SIGXFSZ = 0x19 + _SIGVTALRM = 0x1a + _SIGPROF = 0x1b + _SIGWINCH = 0x1c + _SIGIO = 0x1d + _SIGPWR = 0x1e + _SIGSYS = 0x1f + _FPE_INTDIV = 0x1 _FPE_INTOVF = 0x2 _FPE_FLTDIV = 0x3 @@ -89,6 +92,135 @@ const ( _SOCK_DGRAM = 0x2 ) +// The replication is because constants could be different on different architecture +const ( + _PERF_TYPE_HARDWARE = 0x0 + _PERF_TYPE_SOFTWARE = 0x1 + _PERF_TYPE_TRACEPOINT = 0x2 + _PERF_TYPE_HW_CACHE = 0x3 + _PERF_TYPE_RAW = 0x4 + _PERF_TYPE_BREAKPOINT = 0x5 + + _PERF_COUNT_HW_CPU_CYCLES = 0x0 + _PERF_COUNT_HW_INSTRUCTIONS = 0x1 + _PERF_COUNT_HW_CACHE_REFERENCES = 0x2 + _PERF_COUNT_HW_CACHE_MISSES = 0x3 + _PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 0x4 + _PERF_COUNT_HW_BRANCH_MISSES = 0x5 + _PERF_COUNT_HW_BUS_CYCLES = 0x6 + _PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 0x7 + _PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 0x8 + _PERF_COUNT_HW_REF_CPU_CYCLES = 0x9 + + _PERF_COUNT_HW_CACHE_L1D = 0x0 + _PERF_COUNT_HW_CACHE_L1I = 0x1 + _PERF_COUNT_HW_CACHE_LL = 0x2 + _PERF_COUNT_HW_CACHE_DTLB = 0x3 + _PERF_COUNT_HW_CACHE_ITLB = 0x4 + _PERF_COUNT_HW_CACHE_BPU = 0x5 + _PERF_COUNT_HW_CACHE_NODE = 0x6 + + _PERF_COUNT_HW_CACHE_OP_READ = 0x0 + _PERF_COUNT_HW_CACHE_OP_WRITE = 0x1 + _PERF_COUNT_HW_CACHE_OP_PREFETCH = 0x2 + + _PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0x0 + _PERF_COUNT_HW_CACHE_RESULT_MISS = 0x1 + + _PERF_COUNT_SW_CPU_CLOCK = 0x0 + _PERF_COUNT_SW_TASK_CLOCK = 0x1 + _PERF_COUNT_SW_PAGE_FAULTS = 0x2 + _PERF_COUNT_SW_CONTEXT_SWITCHES = 0x3 + _PERF_COUNT_SW_CPU_MIGRATIONS = 0x4 + _PERF_COUNT_SW_PAGE_FAULTS_MIN = 0x5 + _PERF_COUNT_SW_PAGE_FAULTS_MAJ = 0x6 + _PERF_COUNT_SW_ALIGNMENT_FAULTS = 0x7 + _PERF_COUNT_SW_EMULATION_FAULTS = 0x8 + _PERF_COUNT_SW_DUMMY = 0x9 + _PERF_COUNT_SW_BPF_OUTPUT = 0xa + + _PERF_SAMPLE_IP = 0x1 + _PERF_SAMPLE_TID = 0x2 + _PERF_SAMPLE_TIME = 0x4 + _PERF_SAMPLE_ADDR = 0x8 + _PERF_SAMPLE_READ = 0x10 + _PERF_SAMPLE_CALLCHAIN = 0x20 + _PERF_SAMPLE_ID = 0x40 + _PERF_SAMPLE_CPU = 0x80 + _PERF_SAMPLE_PERIOD = 0x100 + _PERF_SAMPLE_STREAM_ID = 0x200 + _PERF_SAMPLE_RAW = 0x400 + _PERF_SAMPLE_BRANCH_STACK = 0x800 + + _PERF_SAMPLE_BRANCH_USER = 0x1 + _PERF_SAMPLE_BRANCH_KERNEL = 0x2 + _PERF_SAMPLE_BRANCH_HV = 0x4 + _PERF_SAMPLE_BRANCH_ANY = 0x8 + _PERF_SAMPLE_BRANCH_ANY_CALL = 0x10 + _PERF_SAMPLE_BRANCH_ANY_RETURN = 0x20 + _PERF_SAMPLE_BRANCH_IND_CALL = 0x40 + _PERF_SAMPLE_BRANCH_ABORT_TX = 0x80 + _PERF_SAMPLE_BRANCH_IN_TX = 0x100 + _PERF_SAMPLE_BRANCH_NO_TX = 0x200 + _PERF_SAMPLE_BRANCH_COND = 0x400 + _PERF_SAMPLE_BRANCH_CALL_STACK = 0x800 + _PERF_SAMPLE_BRANCH_IND_JUMP = 0x1000 + _PERF_SAMPLE_BRANCH_CALL = 0x2000 + _PERF_SAMPLE_BRANCH_NO_FLAGS = 0x4000 + _PERF_SAMPLE_BRANCH_NO_CYCLES = 0x8000 + _PERF_SAMPLE_BRANCH_TYPE_SAVE = 0x10000 + + _PERF_FORMAT_TOTAL_TIME_ENABLED = 0x1 + _PERF_FORMAT_TOTAL_TIME_RUNNING = 0x2 + _PERF_FORMAT_ID = 0x4 + _PERF_FORMAT_GROUP = 0x8 + + _PERF_RECORD_MISC_EXACT_IP = 0x4000 + + _PERF_RECORD_MMAP = 0x1 + _PERF_RECORD_LOST = 0x2 + _PERF_RECORD_COMM = 0x3 + _PERF_RECORD_EXIT = 0x4 + _PERF_RECORD_THROTTLE = 0x5 + _PERF_RECORD_UNTHROTTLE = 0x6 + _PERF_RECORD_FORK = 0x7 + _PERF_RECORD_READ = 0x8 + _PERF_RECORD_SAMPLE = 0x9 + _PERF_RECORD_MMAP2 = 0xa + _PERF_RECORD_AUX = 0xb + _PERF_RECORD_ITRACE_START = 0xc + _PERF_RECORD_LOST_SAMPLES = 0xd + _PERF_RECORD_SWITCH = 0xe + _PERF_RECORD_SWITCH_CPU_WIDE = 0xf + _PERF_RECORD_NAMESPACES = 0x10 + + _PERF_CONTEXT_HV = -0x20 + _PERF_CONTEXT_KERNEL = -0x80 + _PERF_CONTEXT_USER = -0x200 + + _PERF_CONTEXT_GUEST = -0x800 + _PERF_CONTEXT_GUEST_KERNEL = -0x880 + _PERF_CONTEXT_GUEST_USER = -0xa00 + + _PERF_FLAG_FD_NO_GROUP = 0x1 + _PERF_FLAG_FD_OUTPUT = 0x2 + _PERF_FLAG_PID_CGROUP = 0x4 + _PERF_FLAG_FD_CLOEXEC = 0x8 + + _PERF_EVENT_IOC_DISABLE = 0x2401 + _PERF_EVENT_IOC_ENABLE = 0x2400 + _PERF_EVENT_IOC_ID = 0x80042407 + _PERF_EVENT_IOC_MODIFY_ATTRIBUTES = 0x4004240b + _PERF_EVENT_IOC_PAUSE_OUTPUT = 0x40042409 + _PERF_EVENT_IOC_PERIOD = 0x40082404 + _PERF_EVENT_IOC_QUERY_BPF = 0xc004240a + _PERF_EVENT_IOC_REFRESH = 0x2402 + _PERF_EVENT_IOC_RESET = 0x2403 + _PERF_EVENT_IOC_SET_BPF = 0x40042408 + _PERF_EVENT_IOC_SET_FILTER = 0x40042406 + _PERF_EVENT_IOC_SET_OUTPUT = 0x2405 +) + type timespec struct { tv_sec int32 tv_nsec int32 @@ -159,6 +291,8 @@ type siginfo struct { si_code int32 // below here is a union; si_addr is the only field we use si_addr uint32 + // si_fd is the field right after the union + si_fd int32 } type sigactiont struct { @@ -178,3 +312,9 @@ type sockaddr_un struct { family uint16 path [108]byte } + +// psu: only tested on amd64 +type fOwnerEx struct { + Type int32 + Pid int32 +} diff --git a/src/runtime/defs_linux_arm64.go b/src/runtime/defs_linux_arm64.go index b325a229a1..d19bc77da8 100644 --- a/src/runtime/defs_linux_arm64.go +++ b/src/runtime/defs_linux_arm64.go @@ -13,9 +13,11 @@ const ( _PROT_WRITE = 0x2 _PROT_EXEC = 0x4 - _MAP_ANON = 0x20 - _MAP_PRIVATE = 0x2 - _MAP_FIXED = 0x10 + _MAP_SHARED = 0x1 + _MAP_PRIVATE = 0x2 + _MAP_SHARED_VALIDATE = 0x3 + _MAP_FIXED = 0x10 + _MAP_ANON = 0x20 _MADV_DONTNEED = 0x4 _MADV_FREE = 0x8 @@ -94,6 +96,135 @@ const ( _SOCK_DGRAM = 0x2 ) +// The replication is because constants could be different on different architecture +const ( + _PERF_TYPE_HARDWARE = 0x0 + _PERF_TYPE_SOFTWARE = 0x1 + _PERF_TYPE_TRACEPOINT = 0x2 + _PERF_TYPE_HW_CACHE = 0x3 + _PERF_TYPE_RAW = 0x4 + _PERF_TYPE_BREAKPOINT = 0x5 + + _PERF_COUNT_HW_CPU_CYCLES = 0x0 + _PERF_COUNT_HW_INSTRUCTIONS = 0x1 + _PERF_COUNT_HW_CACHE_REFERENCES = 0x2 + _PERF_COUNT_HW_CACHE_MISSES = 0x3 + _PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 0x4 + _PERF_COUNT_HW_BRANCH_MISSES = 0x5 + _PERF_COUNT_HW_BUS_CYCLES = 0x6 + _PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 0x7 + _PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 0x8 + _PERF_COUNT_HW_REF_CPU_CYCLES = 0x9 + + _PERF_COUNT_HW_CACHE_L1D = 0x0 + _PERF_COUNT_HW_CACHE_L1I = 0x1 + _PERF_COUNT_HW_CACHE_LL = 0x2 + _PERF_COUNT_HW_CACHE_DTLB = 0x3 + _PERF_COUNT_HW_CACHE_ITLB = 0x4 + _PERF_COUNT_HW_CACHE_BPU = 0x5 + _PERF_COUNT_HW_CACHE_NODE = 0x6 + + _PERF_COUNT_HW_CACHE_OP_READ = 0x0 + _PERF_COUNT_HW_CACHE_OP_WRITE = 0x1 + _PERF_COUNT_HW_CACHE_OP_PREFETCH = 0x2 + + _PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0x0 + _PERF_COUNT_HW_CACHE_RESULT_MISS = 0x1 + + _PERF_COUNT_SW_CPU_CLOCK = 0x0 + _PERF_COUNT_SW_TASK_CLOCK = 0x1 + _PERF_COUNT_SW_PAGE_FAULTS = 0x2 + _PERF_COUNT_SW_CONTEXT_SWITCHES = 0x3 + _PERF_COUNT_SW_CPU_MIGRATIONS = 0x4 + _PERF_COUNT_SW_PAGE_FAULTS_MIN = 0x5 + _PERF_COUNT_SW_PAGE_FAULTS_MAJ = 0x6 + _PERF_COUNT_SW_ALIGNMENT_FAULTS = 0x7 + _PERF_COUNT_SW_EMULATION_FAULTS = 0x8 + _PERF_COUNT_SW_DUMMY = 0x9 + _PERF_COUNT_SW_BPF_OUTPUT = 0xa + + _PERF_SAMPLE_IP = 0x1 + _PERF_SAMPLE_TID = 0x2 + _PERF_SAMPLE_TIME = 0x4 + _PERF_SAMPLE_ADDR = 0x8 + _PERF_SAMPLE_READ = 0x10 + _PERF_SAMPLE_CALLCHAIN = 0x20 + _PERF_SAMPLE_ID = 0x40 + _PERF_SAMPLE_CPU = 0x80 + _PERF_SAMPLE_PERIOD = 0x100 + _PERF_SAMPLE_STREAM_ID = 0x200 + _PERF_SAMPLE_RAW = 0x400 + _PERF_SAMPLE_BRANCH_STACK = 0x800 + + _PERF_SAMPLE_BRANCH_USER = 0x1 + _PERF_SAMPLE_BRANCH_KERNEL = 0x2 + _PERF_SAMPLE_BRANCH_HV = 0x4 + _PERF_SAMPLE_BRANCH_ANY = 0x8 + _PERF_SAMPLE_BRANCH_ANY_CALL = 0x10 + _PERF_SAMPLE_BRANCH_ANY_RETURN = 0x20 + _PERF_SAMPLE_BRANCH_IND_CALL = 0x40 + _PERF_SAMPLE_BRANCH_ABORT_TX = 0x80 + _PERF_SAMPLE_BRANCH_IN_TX = 0x100 + _PERF_SAMPLE_BRANCH_NO_TX = 0x200 + _PERF_SAMPLE_BRANCH_COND = 0x400 + _PERF_SAMPLE_BRANCH_CALL_STACK = 0x800 + _PERF_SAMPLE_BRANCH_IND_JUMP = 0x1000 + _PERF_SAMPLE_BRANCH_CALL = 0x2000 + _PERF_SAMPLE_BRANCH_NO_FLAGS = 0x4000 + _PERF_SAMPLE_BRANCH_NO_CYCLES = 0x8000 + _PERF_SAMPLE_BRANCH_TYPE_SAVE = 0x10000 + + _PERF_FORMAT_TOTAL_TIME_ENABLED = 0x1 + _PERF_FORMAT_TOTAL_TIME_RUNNING = 0x2 + _PERF_FORMAT_ID = 0x4 + _PERF_FORMAT_GROUP = 0x8 + + _PERF_RECORD_MISC_EXACT_IP = 0x4000 + + _PERF_RECORD_MMAP = 0x1 + _PERF_RECORD_LOST = 0x2 + _PERF_RECORD_COMM = 0x3 + _PERF_RECORD_EXIT = 0x4 + _PERF_RECORD_THROTTLE = 0x5 + _PERF_RECORD_UNTHROTTLE = 0x6 + _PERF_RECORD_FORK = 0x7 + _PERF_RECORD_READ = 0x8 + _PERF_RECORD_SAMPLE = 0x9 + _PERF_RECORD_MMAP2 = 0xa + _PERF_RECORD_AUX = 0xb + _PERF_RECORD_ITRACE_START = 0xc + _PERF_RECORD_LOST_SAMPLES = 0xd + _PERF_RECORD_SWITCH = 0xe + _PERF_RECORD_SWITCH_CPU_WIDE = 0xf + _PERF_RECORD_NAMESPACES = 0x10 + + _PERF_CONTEXT_HV = -0x20 + _PERF_CONTEXT_KERNEL = -0x80 + _PERF_CONTEXT_USER = -0x200 + + _PERF_CONTEXT_GUEST = -0x800 + _PERF_CONTEXT_GUEST_KERNEL = -0x880 + _PERF_CONTEXT_GUEST_USER = -0xa00 + + _PERF_FLAG_FD_NO_GROUP = 0x1 + _PERF_FLAG_FD_OUTPUT = 0x2 + _PERF_FLAG_PID_CGROUP = 0x4 + _PERF_FLAG_FD_CLOEXEC = 0x8 + + _PERF_EVENT_IOC_DISABLE = 0x2401 + _PERF_EVENT_IOC_ENABLE = 0x2400 + _PERF_EVENT_IOC_ID = 0x80082407 + _PERF_EVENT_IOC_MODIFY_ATTRIBUTES = 0x4008240b + _PERF_EVENT_IOC_PAUSE_OUTPUT = 0x40042409 + _PERF_EVENT_IOC_PERIOD = 0x40082404 + _PERF_EVENT_IOC_QUERY_BPF = 0xc008240a + _PERF_EVENT_IOC_REFRESH = 0x2402 + _PERF_EVENT_IOC_RESET = 0x2403 + _PERF_EVENT_IOC_SET_BPF = 0x40042408 + _PERF_EVENT_IOC_SET_FILTER = 0x40082406 + _PERF_EVENT_IOC_SET_OUTPUT = 0x2405 +) + type timespec struct { tv_sec int64 tv_nsec int64 @@ -127,6 +258,8 @@ type siginfo struct { si_code int32 // below here is a union; si_addr is the only field we use si_addr uint64 + // si_fd is the next field. + si_fd int32 } type itimerval struct { @@ -184,3 +317,9 @@ type ucontext struct { _pad2 [8]byte // sigcontext must be aligned to 16-byte uc_mcontext sigcontext } + +// psu: only tested on amd64 +type fOwnerEx struct { + Type int32 + Pid int32 +} diff --git a/src/runtime/defs_linux_mips64x.go b/src/runtime/defs_linux_mips64x.go index a52d0d40cf..e751798d57 100644 --- a/src/runtime/defs_linux_mips64x.go +++ b/src/runtime/defs_linux_mips64x.go @@ -13,9 +13,11 @@ const ( _PROT_WRITE = 0x2 _PROT_EXEC = 0x4 - _MAP_ANON = 0x800 - _MAP_PRIVATE = 0x2 - _MAP_FIXED = 0x10 + _MAP_SHARED = 0x1 + _MAP_PRIVATE = 0x2 + _MAP_SHARED_VALIDATE = 0x3 + _MAP_FIXED = 0x10 + _MAP_ANON = 0x800 _MADV_DONTNEED = 0x4 _MADV_FREE = 0x8 @@ -89,6 +91,135 @@ const ( _EPOLL_CTL_MOD = 0x3 ) +// The replication is because constants could be different on different architecture +const ( + _PERF_TYPE_HARDWARE = 0x0 + _PERF_TYPE_SOFTWARE = 0x1 + _PERF_TYPE_TRACEPOINT = 0x2 + _PERF_TYPE_HW_CACHE = 0x3 + _PERF_TYPE_RAW = 0x4 + _PERF_TYPE_BREAKPOINT = 0x5 + + _PERF_COUNT_HW_CPU_CYCLES = 0x0 + _PERF_COUNT_HW_INSTRUCTIONS = 0x1 + _PERF_COUNT_HW_CACHE_REFERENCES = 0x2 + _PERF_COUNT_HW_CACHE_MISSES = 0x3 + _PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 0x4 + _PERF_COUNT_HW_BRANCH_MISSES = 0x5 + _PERF_COUNT_HW_BUS_CYCLES = 0x6 + _PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 0x7 + _PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 0x8 + _PERF_COUNT_HW_REF_CPU_CYCLES = 0x9 + + _PERF_COUNT_HW_CACHE_L1D = 0x0 + _PERF_COUNT_HW_CACHE_L1I = 0x1 + _PERF_COUNT_HW_CACHE_LL = 0x2 + _PERF_COUNT_HW_CACHE_DTLB = 0x3 + _PERF_COUNT_HW_CACHE_ITLB = 0x4 + _PERF_COUNT_HW_CACHE_BPU = 0x5 + _PERF_COUNT_HW_CACHE_NODE = 0x6 + + _PERF_COUNT_HW_CACHE_OP_READ = 0x0 + _PERF_COUNT_HW_CACHE_OP_WRITE = 0x1 + _PERF_COUNT_HW_CACHE_OP_PREFETCH = 0x2 + + _PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0x0 + _PERF_COUNT_HW_CACHE_RESULT_MISS = 0x1 + + _PERF_COUNT_SW_CPU_CLOCK = 0x0 + _PERF_COUNT_SW_TASK_CLOCK = 0x1 + _PERF_COUNT_SW_PAGE_FAULTS = 0x2 + _PERF_COUNT_SW_CONTEXT_SWITCHES = 0x3 + _PERF_COUNT_SW_CPU_MIGRATIONS = 0x4 + _PERF_COUNT_SW_PAGE_FAULTS_MIN = 0x5 + _PERF_COUNT_SW_PAGE_FAULTS_MAJ = 0x6 + _PERF_COUNT_SW_ALIGNMENT_FAULTS = 0x7 + _PERF_COUNT_SW_EMULATION_FAULTS = 0x8 + _PERF_COUNT_SW_DUMMY = 0x9 + _PERF_COUNT_SW_BPF_OUTPUT = 0xa + + _PERF_SAMPLE_IP = 0x1 + _PERF_SAMPLE_TID = 0x2 + _PERF_SAMPLE_TIME = 0x4 + _PERF_SAMPLE_ADDR = 0x8 + _PERF_SAMPLE_READ = 0x10 + _PERF_SAMPLE_CALLCHAIN = 0x20 + _PERF_SAMPLE_ID = 0x40 + _PERF_SAMPLE_CPU = 0x80 + _PERF_SAMPLE_PERIOD = 0x100 + _PERF_SAMPLE_STREAM_ID = 0x200 + _PERF_SAMPLE_RAW = 0x400 + _PERF_SAMPLE_BRANCH_STACK = 0x800 + + _PERF_SAMPLE_BRANCH_USER = 0x1 + _PERF_SAMPLE_BRANCH_KERNEL = 0x2 + _PERF_SAMPLE_BRANCH_HV = 0x4 + _PERF_SAMPLE_BRANCH_ANY = 0x8 + _PERF_SAMPLE_BRANCH_ANY_CALL = 0x10 + _PERF_SAMPLE_BRANCH_ANY_RETURN = 0x20 + _PERF_SAMPLE_BRANCH_IND_CALL = 0x40 + _PERF_SAMPLE_BRANCH_ABORT_TX = 0x80 + _PERF_SAMPLE_BRANCH_IN_TX = 0x100 + _PERF_SAMPLE_BRANCH_NO_TX = 0x200 + _PERF_SAMPLE_BRANCH_COND = 0x400 + _PERF_SAMPLE_BRANCH_CALL_STACK = 0x800 + _PERF_SAMPLE_BRANCH_IND_JUMP = 0x1000 + _PERF_SAMPLE_BRANCH_CALL = 0x2000 + _PERF_SAMPLE_BRANCH_NO_FLAGS = 0x4000 + _PERF_SAMPLE_BRANCH_NO_CYCLES = 0x8000 + _PERF_SAMPLE_BRANCH_TYPE_SAVE = 0x10000 + + _PERF_FORMAT_TOTAL_TIME_ENABLED = 0x1 + _PERF_FORMAT_TOTAL_TIME_RUNNING = 0x2 + _PERF_FORMAT_ID = 0x4 + _PERF_FORMAT_GROUP = 0x8 + + _PERF_RECORD_MISC_EXACT_IP = 0x4000 + + _PERF_RECORD_MMAP = 0x1 + _PERF_RECORD_LOST = 0x2 + _PERF_RECORD_COMM = 0x3 + _PERF_RECORD_EXIT = 0x4 + _PERF_RECORD_THROTTLE = 0x5 + _PERF_RECORD_UNTHROTTLE = 0x6 + _PERF_RECORD_FORK = 0x7 + _PERF_RECORD_READ = 0x8 + _PERF_RECORD_SAMPLE = 0x9 + _PERF_RECORD_MMAP2 = 0xa + _PERF_RECORD_AUX = 0xb + _PERF_RECORD_ITRACE_START = 0xc + _PERF_RECORD_LOST_SAMPLES = 0xd + _PERF_RECORD_SWITCH = 0xe + _PERF_RECORD_SWITCH_CPU_WIDE = 0xf + _PERF_RECORD_NAMESPACES = 0x10 + + _PERF_CONTEXT_HV = -0x20 + _PERF_CONTEXT_KERNEL = -0x80 + _PERF_CONTEXT_USER = -0x200 + + _PERF_CONTEXT_GUEST = -0x800 + _PERF_CONTEXT_GUEST_KERNEL = -0x880 + _PERF_CONTEXT_GUEST_USER = -0xa00 + + _PERF_FLAG_FD_NO_GROUP = 0x1 + _PERF_FLAG_FD_OUTPUT = 0x2 + _PERF_FLAG_PID_CGROUP = 0x4 + _PERF_FLAG_FD_CLOEXEC = 0x8 + + _PERF_EVENT_IOC_DISABLE = 0x20002401 + _PERF_EVENT_IOC_ENABLE = 0x20002400 + _PERF_EVENT_IOC_ID = 0x40082407 + _PERF_EVENT_IOC_MODIFY_ATTRIBUTES = 0x8008240b + _PERF_EVENT_IOC_PAUSE_OUTPUT = 0x80042409 + _PERF_EVENT_IOC_PERIOD = 0x80082404 + _PERF_EVENT_IOC_QUERY_BPF = 0xc008240a + _PERF_EVENT_IOC_REFRESH = 0x20002402 + _PERF_EVENT_IOC_RESET = 0x20002403 + _PERF_EVENT_IOC_SET_BPF = 0x80042408 + _PERF_EVENT_IOC_SET_FILTER = 0x80082406 + _PERF_EVENT_IOC_SET_OUTPUT = 0x20002405 +) + //struct Sigset { // uint64 sig[1]; //}; @@ -130,6 +261,8 @@ type siginfo struct { __pad0 [1]int32 // below here is a union; si_addr is the only field we use si_addr uint64 + // si_fd is the field right after the union + si_fd int32 } type itimerval struct { @@ -180,3 +313,9 @@ type ucontext struct { uc_mcontext sigcontext uc_sigmask uint64 } + +// psu: only tested on amd64 +type fOwnerEx struct { + Type int32 + Pid int32 +} diff --git a/src/runtime/defs_linux_mipsx.go b/src/runtime/defs_linux_mipsx.go index f3a1dd0cf0..e79b110856 100644 --- a/src/runtime/defs_linux_mipsx.go +++ b/src/runtime/defs_linux_mipsx.go @@ -17,9 +17,11 @@ const ( _PROT_WRITE = 0x2 _PROT_EXEC = 0x4 - _MAP_ANON = 0x800 - _MAP_PRIVATE = 0x2 - _MAP_FIXED = 0x10 + _MAP_SHARED = 0x1 + _MAP_PRIVATE = 0x2 + _MAP_SHARED_VALIDATE = 0x3 + _MAP_FIXED = 0x10 + _MAP_ANON = 0x800 _MADV_DONTNEED = 0x4 _MADV_FREE = 0x8 @@ -93,6 +95,135 @@ const ( _EPOLL_CTL_MOD = 0x3 ) +// The replication is because constants could be different on different architecture +const ( + _PERF_TYPE_HARDWARE = 0x0 + _PERF_TYPE_SOFTWARE = 0x1 + _PERF_TYPE_TRACEPOINT = 0x2 + _PERF_TYPE_HW_CACHE = 0x3 + _PERF_TYPE_RAW = 0x4 + _PERF_TYPE_BREAKPOINT = 0x5 + + _PERF_COUNT_HW_CPU_CYCLES = 0x0 + _PERF_COUNT_HW_INSTRUCTIONS = 0x1 + _PERF_COUNT_HW_CACHE_REFERENCES = 0x2 + _PERF_COUNT_HW_CACHE_MISSES = 0x3 + _PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 0x4 + _PERF_COUNT_HW_BRANCH_MISSES = 0x5 + _PERF_COUNT_HW_BUS_CYCLES = 0x6 + _PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 0x7 + _PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 0x8 + _PERF_COUNT_HW_REF_CPU_CYCLES = 0x9 + + _PERF_COUNT_HW_CACHE_L1D = 0x0 + _PERF_COUNT_HW_CACHE_L1I = 0x1 + _PERF_COUNT_HW_CACHE_LL = 0x2 + _PERF_COUNT_HW_CACHE_DTLB = 0x3 + _PERF_COUNT_HW_CACHE_ITLB = 0x4 + _PERF_COUNT_HW_CACHE_BPU = 0x5 + _PERF_COUNT_HW_CACHE_NODE = 0x6 + + _PERF_COUNT_HW_CACHE_OP_READ = 0x0 + _PERF_COUNT_HW_CACHE_OP_WRITE = 0x1 + _PERF_COUNT_HW_CACHE_OP_PREFETCH = 0x2 + + _PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0x0 + _PERF_COUNT_HW_CACHE_RESULT_MISS = 0x1 + + _PERF_COUNT_SW_CPU_CLOCK = 0x0 + _PERF_COUNT_SW_TASK_CLOCK = 0x1 + _PERF_COUNT_SW_PAGE_FAULTS = 0x2 + _PERF_COUNT_SW_CONTEXT_SWITCHES = 0x3 + _PERF_COUNT_SW_CPU_MIGRATIONS = 0x4 + _PERF_COUNT_SW_PAGE_FAULTS_MIN = 0x5 + _PERF_COUNT_SW_PAGE_FAULTS_MAJ = 0x6 + _PERF_COUNT_SW_ALIGNMENT_FAULTS = 0x7 + _PERF_COUNT_SW_EMULATION_FAULTS = 0x8 + _PERF_COUNT_SW_DUMMY = 0x9 + _PERF_COUNT_SW_BPF_OUTPUT = 0xa + + _PERF_SAMPLE_IP = 0x1 + _PERF_SAMPLE_TID = 0x2 + _PERF_SAMPLE_TIME = 0x4 + _PERF_SAMPLE_ADDR = 0x8 + _PERF_SAMPLE_READ = 0x10 + _PERF_SAMPLE_CALLCHAIN = 0x20 + _PERF_SAMPLE_ID = 0x40 + _PERF_SAMPLE_CPU = 0x80 + _PERF_SAMPLE_PERIOD = 0x100 + _PERF_SAMPLE_STREAM_ID = 0x200 + _PERF_SAMPLE_RAW = 0x400 + _PERF_SAMPLE_BRANCH_STACK = 0x800 + + _PERF_SAMPLE_BRANCH_USER = 0x1 + _PERF_SAMPLE_BRANCH_KERNEL = 0x2 + _PERF_SAMPLE_BRANCH_HV = 0x4 + _PERF_SAMPLE_BRANCH_ANY = 0x8 + _PERF_SAMPLE_BRANCH_ANY_CALL = 0x10 + _PERF_SAMPLE_BRANCH_ANY_RETURN = 0x20 + _PERF_SAMPLE_BRANCH_IND_CALL = 0x40 + _PERF_SAMPLE_BRANCH_ABORT_TX = 0x80 + _PERF_SAMPLE_BRANCH_IN_TX = 0x100 + _PERF_SAMPLE_BRANCH_NO_TX = 0x200 + _PERF_SAMPLE_BRANCH_COND = 0x400 + _PERF_SAMPLE_BRANCH_CALL_STACK = 0x800 + _PERF_SAMPLE_BRANCH_IND_JUMP = 0x1000 + _PERF_SAMPLE_BRANCH_CALL = 0x2000 + _PERF_SAMPLE_BRANCH_NO_FLAGS = 0x4000 + _PERF_SAMPLE_BRANCH_NO_CYCLES = 0x8000 + _PERF_SAMPLE_BRANCH_TYPE_SAVE = 0x10000 + + _PERF_FORMAT_TOTAL_TIME_ENABLED = 0x1 + _PERF_FORMAT_TOTAL_TIME_RUNNING = 0x2 + _PERF_FORMAT_ID = 0x4 + _PERF_FORMAT_GROUP = 0x8 + + _PERF_RECORD_MISC_EXACT_IP = 0x4000 + + _PERF_RECORD_MMAP = 0x1 + _PERF_RECORD_LOST = 0x2 + _PERF_RECORD_COMM = 0x3 + _PERF_RECORD_EXIT = 0x4 + _PERF_RECORD_THROTTLE = 0x5 + _PERF_RECORD_UNTHROTTLE = 0x6 + _PERF_RECORD_FORK = 0x7 + _PERF_RECORD_READ = 0x8 + _PERF_RECORD_SAMPLE = 0x9 + _PERF_RECORD_MMAP2 = 0xa + _PERF_RECORD_AUX = 0xb + _PERF_RECORD_ITRACE_START = 0xc + _PERF_RECORD_LOST_SAMPLES = 0xd + _PERF_RECORD_SWITCH = 0xe + _PERF_RECORD_SWITCH_CPU_WIDE = 0xf + _PERF_RECORD_NAMESPACES = 0x10 + + _PERF_CONTEXT_HV = -0x20 + _PERF_CONTEXT_KERNEL = -0x80 + _PERF_CONTEXT_USER = -0x200 + + _PERF_CONTEXT_GUEST = -0x800 + _PERF_CONTEXT_GUEST_KERNEL = -0x880 + _PERF_CONTEXT_GUEST_USER = -0xa00 + + _PERF_FLAG_FD_NO_GROUP = 0x1 + _PERF_FLAG_FD_OUTPUT = 0x2 + _PERF_FLAG_PID_CGROUP = 0x4 + _PERF_FLAG_FD_CLOEXEC = 0x8 + + _PERF_EVENT_IOC_DISABLE = 0x20002401 + _PERF_EVENT_IOC_ENABLE = 0x20002400 + _PERF_EVENT_IOC_ID = 0x40042407 + _PERF_EVENT_IOC_MODIFY_ATTRIBUTES = 0x8004240b + _PERF_EVENT_IOC_PAUSE_OUTPUT = 0x80042409 + _PERF_EVENT_IOC_PERIOD = 0x80082404 + _PERF_EVENT_IOC_QUERY_BPF = 0xc004240a + _PERF_EVENT_IOC_REFRESH = 0x20002402 + _PERF_EVENT_IOC_RESET = 0x20002403 + _PERF_EVENT_IOC_SET_BPF = 0x80042408 + _PERF_EVENT_IOC_SET_FILTER = 0x80042406 + _PERF_EVENT_IOC_SET_OUTPUT = 0x20002405 +) + type timespec struct { tv_sec int32 tv_nsec int32 @@ -128,6 +259,8 @@ type siginfo struct { si_errno int32 // below here is a union; si_addr is the only field we use si_addr uint32 + // si_fd is the field right after the union + si_fd int32 } type itimerval struct { @@ -182,3 +315,9 @@ type ucontext struct { uc_mcontext sigcontext uc_sigmask [4]uint32 } + +// psu: only tested on amd64 +type fOwnerEx struct { + Type int32 + Pid int32 +} diff --git a/src/runtime/defs_linux_ppc64.go b/src/runtime/defs_linux_ppc64.go index f438993721..e298833c95 100644 --- a/src/runtime/defs_linux_ppc64.go +++ b/src/runtime/defs_linux_ppc64.go @@ -13,9 +13,11 @@ const ( _PROT_WRITE = 0x2 _PROT_EXEC = 0x4 - _MAP_ANON = 0x20 - _MAP_PRIVATE = 0x2 - _MAP_FIXED = 0x10 + _MAP_SHARED = 0x1 + _MAP_PRIVATE = 0x2 + _MAP_SHARED_VALIDATE = 0x3 + _MAP_FIXED = 0x10 + _MAP_ANON = 0x20 _MADV_DONTNEED = 0x4 _MADV_FREE = 0x8 @@ -89,6 +91,135 @@ const ( _EPOLL_CTL_MOD = 0x3 ) +// The replication is because constants could be different on different architecture +const ( + _PERF_TYPE_HARDWARE = 0x0 + _PERF_TYPE_SOFTWARE = 0x1 + _PERF_TYPE_TRACEPOINT = 0x2 + _PERF_TYPE_HW_CACHE = 0x3 + _PERF_TYPE_RAW = 0x4 + _PERF_TYPE_BREAKPOINT = 0x5 + + _PERF_COUNT_HW_CPU_CYCLES = 0x0 + _PERF_COUNT_HW_INSTRUCTIONS = 0x1 + _PERF_COUNT_HW_CACHE_REFERENCES = 0x2 + _PERF_COUNT_HW_CACHE_MISSES = 0x3 + _PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 0x4 + _PERF_COUNT_HW_BRANCH_MISSES = 0x5 + _PERF_COUNT_HW_BUS_CYCLES = 0x6 + _PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 0x7 + _PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 0x8 + _PERF_COUNT_HW_REF_CPU_CYCLES = 0x9 + + _PERF_COUNT_HW_CACHE_L1D = 0x0 + _PERF_COUNT_HW_CACHE_L1I = 0x1 + _PERF_COUNT_HW_CACHE_LL = 0x2 + _PERF_COUNT_HW_CACHE_DTLB = 0x3 + _PERF_COUNT_HW_CACHE_ITLB = 0x4 + _PERF_COUNT_HW_CACHE_BPU = 0x5 + _PERF_COUNT_HW_CACHE_NODE = 0x6 + + _PERF_COUNT_HW_CACHE_OP_READ = 0x0 + _PERF_COUNT_HW_CACHE_OP_WRITE = 0x1 + _PERF_COUNT_HW_CACHE_OP_PREFETCH = 0x2 + + _PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0x0 + _PERF_COUNT_HW_CACHE_RESULT_MISS = 0x1 + + _PERF_COUNT_SW_CPU_CLOCK = 0x0 + _PERF_COUNT_SW_TASK_CLOCK = 0x1 + _PERF_COUNT_SW_PAGE_FAULTS = 0x2 + _PERF_COUNT_SW_CONTEXT_SWITCHES = 0x3 + _PERF_COUNT_SW_CPU_MIGRATIONS = 0x4 + _PERF_COUNT_SW_PAGE_FAULTS_MIN = 0x5 + _PERF_COUNT_SW_PAGE_FAULTS_MAJ = 0x6 + _PERF_COUNT_SW_ALIGNMENT_FAULTS = 0x7 + _PERF_COUNT_SW_EMULATION_FAULTS = 0x8 + _PERF_COUNT_SW_DUMMY = 0x9 + _PERF_COUNT_SW_BPF_OUTPUT = 0xa + + _PERF_SAMPLE_IP = 0x1 + _PERF_SAMPLE_TID = 0x2 + _PERF_SAMPLE_TIME = 0x4 + _PERF_SAMPLE_ADDR = 0x8 + _PERF_SAMPLE_READ = 0x10 + _PERF_SAMPLE_CALLCHAIN = 0x20 + _PERF_SAMPLE_ID = 0x40 + _PERF_SAMPLE_CPU = 0x80 + _PERF_SAMPLE_PERIOD = 0x100 + _PERF_SAMPLE_STREAM_ID = 0x200 + _PERF_SAMPLE_RAW = 0x400 + _PERF_SAMPLE_BRANCH_STACK = 0x800 + + _PERF_SAMPLE_BRANCH_USER = 0x1 + _PERF_SAMPLE_BRANCH_KERNEL = 0x2 + _PERF_SAMPLE_BRANCH_HV = 0x4 + _PERF_SAMPLE_BRANCH_ANY = 0x8 + _PERF_SAMPLE_BRANCH_ANY_CALL = 0x10 + _PERF_SAMPLE_BRANCH_ANY_RETURN = 0x20 + _PERF_SAMPLE_BRANCH_IND_CALL = 0x40 + _PERF_SAMPLE_BRANCH_ABORT_TX = 0x80 + _PERF_SAMPLE_BRANCH_IN_TX = 0x100 + _PERF_SAMPLE_BRANCH_NO_TX = 0x200 + _PERF_SAMPLE_BRANCH_COND = 0x400 + _PERF_SAMPLE_BRANCH_CALL_STACK = 0x800 + _PERF_SAMPLE_BRANCH_IND_JUMP = 0x1000 + _PERF_SAMPLE_BRANCH_CALL = 0x2000 + _PERF_SAMPLE_BRANCH_NO_FLAGS = 0x4000 + _PERF_SAMPLE_BRANCH_NO_CYCLES = 0x8000 + _PERF_SAMPLE_BRANCH_TYPE_SAVE = 0x10000 + + _PERF_FORMAT_TOTAL_TIME_ENABLED = 0x1 + _PERF_FORMAT_TOTAL_TIME_RUNNING = 0x2 + _PERF_FORMAT_ID = 0x4 + _PERF_FORMAT_GROUP = 0x8 + + _PERF_RECORD_MISC_EXACT_IP = 0x4000 + + _PERF_RECORD_MMAP = 0x1 + _PERF_RECORD_LOST = 0x2 + _PERF_RECORD_COMM = 0x3 + _PERF_RECORD_EXIT = 0x4 + _PERF_RECORD_THROTTLE = 0x5 + _PERF_RECORD_UNTHROTTLE = 0x6 + _PERF_RECORD_FORK = 0x7 + _PERF_RECORD_READ = 0x8 + _PERF_RECORD_SAMPLE = 0x9 + _PERF_RECORD_MMAP2 = 0xa + _PERF_RECORD_AUX = 0xb + _PERF_RECORD_ITRACE_START = 0xc + _PERF_RECORD_LOST_SAMPLES = 0xd + _PERF_RECORD_SWITCH = 0xe + _PERF_RECORD_SWITCH_CPU_WIDE = 0xf + _PERF_RECORD_NAMESPACES = 0x10 + + _PERF_CONTEXT_HV = -0x20 + _PERF_CONTEXT_KERNEL = -0x80 + _PERF_CONTEXT_USER = -0x200 + + _PERF_CONTEXT_GUEST = -0x800 + _PERF_CONTEXT_GUEST_KERNEL = -0x880 + _PERF_CONTEXT_GUEST_USER = -0xa00 + + _PERF_FLAG_FD_NO_GROUP = 0x1 + _PERF_FLAG_FD_OUTPUT = 0x2 + _PERF_FLAG_PID_CGROUP = 0x4 + _PERF_FLAG_FD_CLOEXEC = 0x8 + + _PERF_EVENT_IOC_DISABLE = 0x2401 + _PERF_EVENT_IOC_ENABLE = 0x2400 + _PERF_EVENT_IOC_ID = 0x80082407 + _PERF_EVENT_IOC_MODIFY_ATTRIBUTES = 0x4008240b + _PERF_EVENT_IOC_PAUSE_OUTPUT = 0x40042409 + _PERF_EVENT_IOC_PERIOD = 0x40082404 + _PERF_EVENT_IOC_QUERY_BPF = 0xc008240a + _PERF_EVENT_IOC_REFRESH = 0x2402 + _PERF_EVENT_IOC_RESET = 0x2403 + _PERF_EVENT_IOC_SET_BPF = 0x40042408 + _PERF_EVENT_IOC_SET_FILTER = 0x40082406 + _PERF_EVENT_IOC_SET_OUTPUT = 0x2405 +) + //struct Sigset { // uint64 sig[1]; //}; @@ -127,6 +258,8 @@ type siginfo struct { si_code int32 // below here is a union; si_addr is the only field we use si_addr uint64 + // si_fd is the field right after the union + si_fd int32 } type itimerval struct { @@ -197,3 +330,9 @@ type ucontext struct { __unused [15]uint64 uc_mcontext sigcontext } + +// psu: only tested on amd64 +type fOwnerEx struct { + Type int32 + Pid int32 +} diff --git a/src/runtime/defs_linux_ppc64le.go b/src/runtime/defs_linux_ppc64le.go index f438993721..0f5b5771d6 100644 --- a/src/runtime/defs_linux_ppc64le.go +++ b/src/runtime/defs_linux_ppc64le.go @@ -13,9 +13,11 @@ const ( _PROT_WRITE = 0x2 _PROT_EXEC = 0x4 - _MAP_ANON = 0x20 - _MAP_PRIVATE = 0x2 - _MAP_FIXED = 0x10 + _MAP_SHARED = 0x1 + _MAP_PRIVATE = 0x2 + _MAP_SHARED_VALIDATE = 0x3 + _MAP_FIXED = 0x10 + _MAP_ANON = 0x20 _MADV_DONTNEED = 0x4 _MADV_FREE = 0x8 @@ -89,6 +91,135 @@ const ( _EPOLL_CTL_MOD = 0x3 ) +// The replication is because constants could be different on different architecture +const ( + _PERF_TYPE_HARDWARE = 0x0 + _PERF_TYPE_SOFTWARE = 0x1 + _PERF_TYPE_TRACEPOINT = 0x2 + _PERF_TYPE_HW_CACHE = 0x3 + _PERF_TYPE_RAW = 0x4 + _PERF_TYPE_BREAKPOINT = 0x5 + + _PERF_COUNT_HW_CPU_CYCLES = 0x0 + _PERF_COUNT_HW_INSTRUCTIONS = 0x1 + _PERF_COUNT_HW_CACHE_REFERENCES = 0x2 + _PERF_COUNT_HW_CACHE_MISSES = 0x3 + _PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 0x4 + _PERF_COUNT_HW_BRANCH_MISSES = 0x5 + _PERF_COUNT_HW_BUS_CYCLES = 0x6 + _PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 0x7 + _PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 0x8 + _PERF_COUNT_HW_REF_CPU_CYCLES = 0x9 + + _PERF_COUNT_HW_CACHE_L1D = 0x0 + _PERF_COUNT_HW_CACHE_L1I = 0x1 + _PERF_COUNT_HW_CACHE_LL = 0x2 + _PERF_COUNT_HW_CACHE_DTLB = 0x3 + _PERF_COUNT_HW_CACHE_ITLB = 0x4 + _PERF_COUNT_HW_CACHE_BPU = 0x5 + _PERF_COUNT_HW_CACHE_NODE = 0x6 + + _PERF_COUNT_HW_CACHE_OP_READ = 0x0 + _PERF_COUNT_HW_CACHE_OP_WRITE = 0x1 + _PERF_COUNT_HW_CACHE_OP_PREFETCH = 0x2 + + _PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0x0 + _PERF_COUNT_HW_CACHE_RESULT_MISS = 0x1 + + _PERF_COUNT_SW_CPU_CLOCK = 0x0 + _PERF_COUNT_SW_TASK_CLOCK = 0x1 + _PERF_COUNT_SW_PAGE_FAULTS = 0x2 + _PERF_COUNT_SW_CONTEXT_SWITCHES = 0x3 + _PERF_COUNT_SW_CPU_MIGRATIONS = 0x4 + _PERF_COUNT_SW_PAGE_FAULTS_MIN = 0x5 + _PERF_COUNT_SW_PAGE_FAULTS_MAJ = 0x6 + _PERF_COUNT_SW_ALIGNMENT_FAULTS = 0x7 + _PERF_COUNT_SW_EMULATION_FAULTS = 0x8 + _PERF_COUNT_SW_DUMMY = 0x9 + _PERF_COUNT_SW_BPF_OUTPUT = 0xa + + _PERF_SAMPLE_IP = 0x1 + _PERF_SAMPLE_TID = 0x2 + _PERF_SAMPLE_TIME = 0x4 + _PERF_SAMPLE_ADDR = 0x8 + _PERF_SAMPLE_READ = 0x10 + _PERF_SAMPLE_CALLCHAIN = 0x20 + _PERF_SAMPLE_ID = 0x40 + _PERF_SAMPLE_CPU = 0x80 + _PERF_SAMPLE_PERIOD = 0x100 + _PERF_SAMPLE_STREAM_ID = 0x200 + _PERF_SAMPLE_RAW = 0x400 + _PERF_SAMPLE_BRANCH_STACK = 0x800 + + _PERF_SAMPLE_BRANCH_USER = 0x1 + _PERF_SAMPLE_BRANCH_KERNEL = 0x2 + _PERF_SAMPLE_BRANCH_HV = 0x4 + _PERF_SAMPLE_BRANCH_ANY = 0x8 + _PERF_SAMPLE_BRANCH_ANY_CALL = 0x10 + _PERF_SAMPLE_BRANCH_ANY_RETURN = 0x20 + _PERF_SAMPLE_BRANCH_IND_CALL = 0x40 + _PERF_SAMPLE_BRANCH_ABORT_TX = 0x80 + _PERF_SAMPLE_BRANCH_IN_TX = 0x100 + _PERF_SAMPLE_BRANCH_NO_TX = 0x200 + _PERF_SAMPLE_BRANCH_COND = 0x400 + _PERF_SAMPLE_BRANCH_CALL_STACK = 0x800 + _PERF_SAMPLE_BRANCH_IND_JUMP = 0x1000 + _PERF_SAMPLE_BRANCH_CALL = 0x2000 + _PERF_SAMPLE_BRANCH_NO_FLAGS = 0x4000 + _PERF_SAMPLE_BRANCH_NO_CYCLES = 0x8000 + _PERF_SAMPLE_BRANCH_TYPE_SAVE = 0x10000 + + _PERF_FORMAT_TOTAL_TIME_ENABLED = 0x1 + _PERF_FORMAT_TOTAL_TIME_RUNNING = 0x2 + _PERF_FORMAT_ID = 0x4 + _PERF_FORMAT_GROUP = 0x8 + + _PERF_RECORD_MISC_EXACT_IP = 0x4000 + + _PERF_RECORD_MMAP = 0x1 + _PERF_RECORD_LOST = 0x2 + _PERF_RECORD_COMM = 0x3 + _PERF_RECORD_EXIT = 0x4 + _PERF_RECORD_THROTTLE = 0x5 + _PERF_RECORD_UNTHROTTLE = 0x6 + _PERF_RECORD_FORK = 0x7 + _PERF_RECORD_READ = 0x8 + _PERF_RECORD_SAMPLE = 0x9 + _PERF_RECORD_MMAP2 = 0xa + _PERF_RECORD_AUX = 0xb + _PERF_RECORD_ITRACE_START = 0xc + _PERF_RECORD_LOST_SAMPLES = 0xd + _PERF_RECORD_SWITCH = 0xe + _PERF_RECORD_SWITCH_CPU_WIDE = 0xf + _PERF_RECORD_NAMESPACES = 0x10 + + _PERF_CONTEXT_HV = -0x20 + _PERF_CONTEXT_KERNEL = -0x80 + _PERF_CONTEXT_USER = -0x200 + + _PERF_CONTEXT_GUEST = -0x800 + _PERF_CONTEXT_GUEST_KERNEL = -0x880 + _PERF_CONTEXT_GUEST_USER = -0xa00 + + _PERF_FLAG_FD_NO_GROUP = 0x1 + _PERF_FLAG_FD_OUTPUT = 0x2 + _PERF_FLAG_PID_CGROUP = 0x4 + _PERF_FLAG_FD_CLOEXEC = 0x8 + + _PERF_EVENT_IOC_DISABLE = 0x20002401 + _PERF_EVENT_IOC_ENABLE = 0x20002400 + _PERF_EVENT_IOC_ID = 0x40082407 + _PERF_EVENT_IOC_MODIFY_ATTRIBUTES = 0x8008240b + _PERF_EVENT_IOC_PAUSE_OUTPUT = 0x80042409 + _PERF_EVENT_IOC_PERIOD = 0x80082404 + _PERF_EVENT_IOC_QUERY_BPF = 0xc008240a + _PERF_EVENT_IOC_REFRESH = 0x20002402 + _PERF_EVENT_IOC_RESET = 0x20002403 + _PERF_EVENT_IOC_SET_BPF = 0x80042408 + _PERF_EVENT_IOC_SET_FILTER = 0x80082406 + _PERF_EVENT_IOC_SET_OUTPUT = 0x20002405 +) + //struct Sigset { // uint64 sig[1]; //}; @@ -127,6 +258,8 @@ type siginfo struct { si_code int32 // below here is a union; si_addr is the only field we use si_addr uint64 + // si_fd is the field right after the union + si_fd int32 } type itimerval struct { @@ -197,3 +330,9 @@ type ucontext struct { __unused [15]uint64 uc_mcontext sigcontext } + +// psu: only tested on amd64 +type fOwnerEx struct { + Type int32 + Pid int32 +} diff --git a/src/runtime/defs_linux_s390x.go b/src/runtime/defs_linux_s390x.go index 19b99b5bdf..165bb5a788 100644 --- a/src/runtime/defs_linux_s390x.go +++ b/src/runtime/defs_linux_s390x.go @@ -14,9 +14,11 @@ const ( _PROT_WRITE = 0x2 _PROT_EXEC = 0x4 - _MAP_ANON = 0x20 - _MAP_PRIVATE = 0x2 - _MAP_FIXED = 0x10 + _MAP_SHARED = 0x1 + _MAP_PRIVATE = 0x2 + _MAP_SHARED_VALIDATE = 0x3 + _MAP_FIXED = 0x10 + _MAP_ANON = 0x20 _MADV_DONTNEED = 0x4 _MADV_FREE = 0x8 @@ -90,6 +92,135 @@ const ( _EPOLL_CTL_MOD = 0x3 ) +// The replication is because constants could be different on different architecture +const ( + _PERF_TYPE_HARDWARE = 0x0 + _PERF_TYPE_SOFTWARE = 0x1 + _PERF_TYPE_TRACEPOINT = 0x2 + _PERF_TYPE_HW_CACHE = 0x3 + _PERF_TYPE_RAW = 0x4 + _PERF_TYPE_BREAKPOINT = 0x5 + + _PERF_COUNT_HW_CPU_CYCLES = 0x0 + _PERF_COUNT_HW_INSTRUCTIONS = 0x1 + _PERF_COUNT_HW_CACHE_REFERENCES = 0x2 + _PERF_COUNT_HW_CACHE_MISSES = 0x3 + _PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 0x4 + _PERF_COUNT_HW_BRANCH_MISSES = 0x5 + _PERF_COUNT_HW_BUS_CYCLES = 0x6 + _PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 0x7 + _PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 0x8 + _PERF_COUNT_HW_REF_CPU_CYCLES = 0x9 + + _PERF_COUNT_HW_CACHE_L1D = 0x0 + _PERF_COUNT_HW_CACHE_L1I = 0x1 + _PERF_COUNT_HW_CACHE_LL = 0x2 + _PERF_COUNT_HW_CACHE_DTLB = 0x3 + _PERF_COUNT_HW_CACHE_ITLB = 0x4 + _PERF_COUNT_HW_CACHE_BPU = 0x5 + _PERF_COUNT_HW_CACHE_NODE = 0x6 + + _PERF_COUNT_HW_CACHE_OP_READ = 0x0 + _PERF_COUNT_HW_CACHE_OP_WRITE = 0x1 + _PERF_COUNT_HW_CACHE_OP_PREFETCH = 0x2 + + _PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0x0 + _PERF_COUNT_HW_CACHE_RESULT_MISS = 0x1 + + _PERF_COUNT_SW_CPU_CLOCK = 0x0 + _PERF_COUNT_SW_TASK_CLOCK = 0x1 + _PERF_COUNT_SW_PAGE_FAULTS = 0x2 + _PERF_COUNT_SW_CONTEXT_SWITCHES = 0x3 + _PERF_COUNT_SW_CPU_MIGRATIONS = 0x4 + _PERF_COUNT_SW_PAGE_FAULTS_MIN = 0x5 + _PERF_COUNT_SW_PAGE_FAULTS_MAJ = 0x6 + _PERF_COUNT_SW_ALIGNMENT_FAULTS = 0x7 + _PERF_COUNT_SW_EMULATION_FAULTS = 0x8 + _PERF_COUNT_SW_DUMMY = 0x9 + _PERF_COUNT_SW_BPF_OUTPUT = 0xa + + _PERF_SAMPLE_IP = 0x1 + _PERF_SAMPLE_TID = 0x2 + _PERF_SAMPLE_TIME = 0x4 + _PERF_SAMPLE_ADDR = 0x8 + _PERF_SAMPLE_READ = 0x10 + _PERF_SAMPLE_CALLCHAIN = 0x20 + _PERF_SAMPLE_ID = 0x40 + _PERF_SAMPLE_CPU = 0x80 + _PERF_SAMPLE_PERIOD = 0x100 + _PERF_SAMPLE_STREAM_ID = 0x200 + _PERF_SAMPLE_RAW = 0x400 + _PERF_SAMPLE_BRANCH_STACK = 0x800 + + _PERF_SAMPLE_BRANCH_USER = 0x1 + _PERF_SAMPLE_BRANCH_KERNEL = 0x2 + _PERF_SAMPLE_BRANCH_HV = 0x4 + _PERF_SAMPLE_BRANCH_ANY = 0x8 + _PERF_SAMPLE_BRANCH_ANY_CALL = 0x10 + _PERF_SAMPLE_BRANCH_ANY_RETURN = 0x20 + _PERF_SAMPLE_BRANCH_IND_CALL = 0x40 + _PERF_SAMPLE_BRANCH_ABORT_TX = 0x80 + _PERF_SAMPLE_BRANCH_IN_TX = 0x100 + _PERF_SAMPLE_BRANCH_NO_TX = 0x200 + _PERF_SAMPLE_BRANCH_COND = 0x400 + _PERF_SAMPLE_BRANCH_CALL_STACK = 0x800 + _PERF_SAMPLE_BRANCH_IND_JUMP = 0x1000 + _PERF_SAMPLE_BRANCH_CALL = 0x2000 + _PERF_SAMPLE_BRANCH_NO_FLAGS = 0x4000 + _PERF_SAMPLE_BRANCH_NO_CYCLES = 0x8000 + _PERF_SAMPLE_BRANCH_TYPE_SAVE = 0x10000 + + _PERF_FORMAT_TOTAL_TIME_ENABLED = 0x1 + _PERF_FORMAT_TOTAL_TIME_RUNNING = 0x2 + _PERF_FORMAT_ID = 0x4 + _PERF_FORMAT_GROUP = 0x8 + + _PERF_RECORD_MISC_EXACT_IP = 0x4000 + + _PERF_RECORD_MMAP = 0x1 + _PERF_RECORD_LOST = 0x2 + _PERF_RECORD_COMM = 0x3 + _PERF_RECORD_EXIT = 0x4 + _PERF_RECORD_THROTTLE = 0x5 + _PERF_RECORD_UNTHROTTLE = 0x6 + _PERF_RECORD_FORK = 0x7 + _PERF_RECORD_READ = 0x8 + _PERF_RECORD_SAMPLE = 0x9 + _PERF_RECORD_MMAP2 = 0xa + _PERF_RECORD_AUX = 0xb + _PERF_RECORD_ITRACE_START = 0xc + _PERF_RECORD_LOST_SAMPLES = 0xd + _PERF_RECORD_SWITCH = 0xe + _PERF_RECORD_SWITCH_CPU_WIDE = 0xf + _PERF_RECORD_NAMESPACES = 0x10 + + _PERF_CONTEXT_HV = -0x20 + _PERF_CONTEXT_KERNEL = -0x80 + _PERF_CONTEXT_USER = -0x200 + + _PERF_CONTEXT_GUEST = -0x800 + _PERF_CONTEXT_GUEST_KERNEL = -0x880 + _PERF_CONTEXT_GUEST_USER = -0xa00 + + _PERF_FLAG_FD_NO_GROUP = 0x1 + _PERF_FLAG_FD_OUTPUT = 0x2 + _PERF_FLAG_PID_CGROUP = 0x4 + _PERF_FLAG_FD_CLOEXEC = 0x8 + + _PERF_EVENT_IOC_DISABLE = 0x2401 + _PERF_EVENT_IOC_ENABLE = 0x2400 + _PERF_EVENT_IOC_ID = 0x80082407 + _PERF_EVENT_IOC_MODIFY_ATTRIBUTES = 0x4008240b + _PERF_EVENT_IOC_PAUSE_OUTPUT = 0x40042409 + _PERF_EVENT_IOC_PERIOD = 0x40082404 + _PERF_EVENT_IOC_QUERY_BPF = 0xc008240a + _PERF_EVENT_IOC_REFRESH = 0x2402 + _PERF_EVENT_IOC_RESET = 0x2403 + _PERF_EVENT_IOC_SET_BPF = 0x40042408 + _PERF_EVENT_IOC_SET_FILTER = 0x40082406 + _PERF_EVENT_IOC_SET_OUTPUT = 0x2405 +) + type timespec struct { tv_sec int64 tv_nsec int64 @@ -123,6 +254,8 @@ type siginfo struct { si_code int32 // below here is a union; si_addr is the only field we use si_addr uint64 + // si_fd is the field right after the union + si_fd int32 } type itimerval struct { @@ -164,3 +297,9 @@ type ucontext struct { uc_mcontext sigcontext uc_sigmask uint64 } + +// psu: only tested on amd64 +type fOwnerEx struct { + Type int32 + Pid int32 +} diff --git a/src/runtime/defs_perf_linux.go b/src/runtime/defs_perf_linux.go new file mode 100644 index 0000000000..fce76ccf16 --- /dev/null +++ b/src/runtime/defs_perf_linux.go @@ -0,0 +1,85 @@ +package runtime + +// Convert platform-agnostic pmu events to Linux perf events +var perfEventOpt = map[int32]struct { + // The lower-case "type" is the keyword in Go + Type uint32 // type of event + config uint64 // event +}{ + GO_COUNT_HW_CPU_CYCLES: {_PERF_TYPE_HARDWARE, _PERF_COUNT_HW_CPU_CYCLES}, + GO_COUNT_HW_INSTRUCTIONS: {_PERF_TYPE_HARDWARE, _PERF_COUNT_HW_INSTRUCTIONS}, + GO_COUNT_HW_CACHE_REFERENCES: {_PERF_TYPE_HARDWARE, _PERF_COUNT_HW_CACHE_REFERENCES}, + GO_COUNT_HW_CACHE_MISSES: {_PERF_TYPE_HARDWARE, _PERF_COUNT_HW_CACHE_MISSES}, + GO_COUNT_HW_CACHE_LL_READ_ACCESSES: {_PERF_TYPE_HW_CACHE, (_PERF_COUNT_HW_CACHE_LL) | (_PERF_COUNT_HW_CACHE_OP_READ << 8) | (_PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)}, + GO_COUNT_HW_CACHE_LL_READ_MISSES: {_PERF_TYPE_HW_CACHE, (_PERF_COUNT_HW_CACHE_LL) | (_PERF_COUNT_HW_CACHE_OP_READ << 8) | (_PERF_COUNT_HW_CACHE_RESULT_MISS << 16)}, + GO_COUNT_HW_RAW: {_PERF_TYPE_RAW, 0 /* will not be used */}, + // TODO: add more perf events +} + +type perfEventAttr struct { + // The lower-case "type" is the keyword in Go + Type uint32 + size uint32 + config uint64 + sample uint64 + sample_type uint64 + read_format uint64 + bits uint64 + wakeup uint32 + bp_type uint32 + ext1 uint64 + ext2 uint64 + branch_sample_type uint64 + sample_regs_user uint64 + sample_stack_user uint32 + clockid int32 + sample_regs_intr uint64 + aux_watermark uint32 + sample_max_stack uint16 + _ uint16 +} + +type perfEventMmapPage struct { + version uint32 + compat_version uint32 + lock uint32 + index uint32 + offset int64 + time_enabled uint64 + Time_running uint64 + capabilities uint64 + pmc_width uint16 + time_shift uint16 + time_mult uint32 + time_offset uint64 + time_zero uint64 + size uint32 + _ [948]uint8 + data_head uint64 + data_tail uint64 + data_offset uint64 + data_size uint64 + aux_head uint64 + aux_tail uint64 + aux_offset uint64 + aux_size uint64 +} + +type perfEventHeader struct { + // The lower-case "type" is the keyword in Go + Type uint32 + misc uint16 + size uint16 +} + +// The order where values are saved in a sample has to match the mmap ring buffer layout +type perfSampleData struct { + ip uint64 // if _PERF_SAMPLE_IP + pid uint32 // if _PERF_SAMPLE_TID + tid uint32 // if _PERF_SAMPLE_TID + addr uint64 // if _PERF_SAMPLE_ADDR + // TODO: More fields can be added in order if needed + + /*********** auxiliary fields ***********/ + isPreciseIP bool // whether the obtained ip is precise or not +} diff --git a/src/runtime/defs_pmu.go b/src/runtime/defs_pmu.go new file mode 100644 index 0000000000..450852c250 --- /dev/null +++ b/src/runtime/defs_pmu.go @@ -0,0 +1,29 @@ +package runtime + +// These Constants are platform agnostic and exposed to pprof. +// We use perfEventOpt to map these to the underlying OS and HW. +const ( + GO_COUNT_HW_CPU_CYCLES = 0x0 + GO_COUNT_HW_INSTRUCTIONS = 0x1 + GO_COUNT_HW_CACHE_REFERENCES = 0x2 + GO_COUNT_HW_CACHE_MISSES = 0x3 + GO_COUNT_HW_CACHE_LL_READ_ACCESSES = 0x4 + GO_COUNT_HW_CACHE_LL_READ_MISSES = 0x5 + GO_COUNT_HW_RAW = 0x6 + GO_COUNT_PMU_EVENTS_MAX = 0x7 +) + +type PMUEventAttr struct { + Period uint64 + RawEvent uint64 + PreciseIP uint8 + IsSampleIPIncluded bool + IsSampleThreadIDIncluded bool + IsSampleAddrIncluded bool + IsSampleCallchainIncluded bool + IsKernelIncluded bool + IsHvIncluded bool + IsIdleIncluded bool + IsCallchainKernelIncluded bool + IsCallchainUserIncluded bool +} diff --git a/src/runtime/mem_barrier.go b/src/runtime/mem_barrier.go new file mode 100644 index 0000000000..39035b3529 --- /dev/null +++ b/src/runtime/mem_barrier.go @@ -0,0 +1,13 @@ +package runtime + +// a hardware memory barrier that prevents any memory write access from +// being moved and executed on the other side of the barrier +func mb() + +// a hardware memory barrier that prevents any memory read access from +// being moved and executed on the other side of the barrier +func rmb() + +// A hardware memory barrier that prevents any memory write access from +// being moved and executed on the other side of the barrier +func wmb() diff --git a/src/runtime/os_linux.go b/src/runtime/os_linux.go index d4a9bd4ff5..a55ad2a6c5 100644 --- a/src/runtime/os_linux.go +++ b/src/runtime/os_linux.go @@ -5,6 +5,7 @@ package runtime import ( + "runtime/internal/atomic" "runtime/internal/sys" "unsafe" ) @@ -289,6 +290,12 @@ func getHugePageSize() uintptr { func osinit() { ncpu = getproccount() physHugePageSize = getHugePageSize() + + // following the same convention as other write once variables in this function and + // assuming that there exists a memory fence before anybody reads these values + setProcessPMUProfilerFptr = setProcessPMUProfiler + setThreadPMUProfilerFptr = setThreadPMUProfiler + sigprofPMUHandlerFptr = sigprofPMUHandler } var urandom_dev = []byte("/dev/urandom\x00") @@ -452,3 +459,187 @@ func sysSigaction(sig uint32, new, old *sigactiont) { // rt_sigaction is implemented in assembly. //go:noescape func rt_sigaction(sig uintptr, new, old *sigactiont, size uintptr) int32 + +func setProcessPMUProfiler(eventAttr *PMUEventAttr) { + if eventAttr != nil { + atomic.Cas(&cpuorpmuprofiler, _UNINSTALLED, _PMU_INSTALLED) + // Enable the Go signal handler if not enabled. + if atomic.Cas(&handlingSig[_SIGPROF], _UNINSTALLED, _PMU_INSTALLED) { + atomic.Storeuintptr(&fwdSig[_SIGPROF], getsig(_SIGPROF)) + setsig(_SIGPROF, funcPC(sighandler)) + } + } else { + atomic.Cas(&cpuorpmuprofiler, _PMU_INSTALLED, _UNINSTALLED) + // If the Go signal handler should be disabled by default, + // disable it if it is enabled. + if !sigInstallGoHandler(_SIGPROF) { + if atomic.Cas(&handlingSig[_SIGPROF], _PMU_INSTALLED, _UNINSTALLED) { + setsig(_SIGPROF, atomic.Loaduintptr(&fwdSig[_SIGPROF])) + } + } + } +} + +func fcntl(fd int32, cmd, arg int) (r, err int) + +//go:noescape +func fcntl2(fd int32, cmd int, arg *fOwnerEx) (r, err int) + +func setThreadPMUProfiler(eventId int32, eventAttr *PMUEventAttr) { + _g_ := getg() + + if eventAttr == nil { + if _g_.m.eventAttrs[eventId] != nil { + // We need to disable the counter prior to closing the file descriptor + perfStopCounter(_g_.m.eventFds[eventId]) + closefd(_g_.m.eventFds[eventId]) + _g_.m.eventAttrs[eventId] = nil + } + if _g_.m.eventMmapBufs[eventId] != nil { + perfUnsetMmap(_g_.m.eventMmapBufs[eventId]) + _g_.m.eventMmapBufs[eventId] = nil + } + } else { + if _g_.m.eventAttrs[eventId] != nil { + // We need to disable the counter prior to closing the file descriptor + perfStopCounter(_g_.m.eventFds[eventId]) + closefd(_g_.m.eventFds[eventId]) + _g_.m.eventAttrs[eventId] = nil + } + if _g_.m.eventMmapBufs[eventId] != nil { + perfUnsetMmap(_g_.m.eventMmapBufs[eventId]) + _g_.m.eventMmapBufs[eventId] = nil + } + + var perfAttr perfEventAttr + perfAttrInit(eventId, eventAttr, &perfAttr) + + fd, _, err := perfEventOpen(&perfAttr, 0, -1, -1, 0, 0 /* dummy */) + if err != 0 { + println("Linux perf event open failed") + return + } + + // create mmap buffer for this file + mmapBuf := perfSetMmap(fd) + if mmapBuf == nil { + closefd(fd) + println("Fail to set perf mmap") + return + } + + flag, _ := fcntl(fd, 0x3 /* F_GETFL */, 0) + _, err = fcntl(fd, 0x4 /* F_SETFL */, flag|0x2000 /* O_ASYNC */) + if err != 0 { + closefd(fd) + perfUnsetMmap(mmapBuf) + println("Failed to set notification for the PMU event") + return + } + + _, err = fcntl(fd, 0xa /* F_SETSIG */, _SIGPROF) + if err != 0 { + closefd(fd) + perfUnsetMmap(mmapBuf) + println("Failed to set signal for the PMU event") + return + } + + fOwnEx := fOwnerEx{0 /* F_OWNER_TID */, int32(gettid())} + _, err = fcntl2(fd, 0xf /* F_SETOWN_EX */, &fOwnEx) + if err != 0 { + closefd(fd) + perfUnsetMmap(mmapBuf) + println("Failed to set the owner of the perf event file") + return + } + + _g_.m.eventAttrs[eventId] = eventAttr + _g_.m.eventMmapBufs[eventId] = mmapBuf + _g_.m.eventFds[eventId] = fd + + if !perfResetCounter(fd) { + closefd(fd) + perfUnsetMmap(mmapBuf) + _g_.m.eventAttrs[eventId] = nil + _g_.m.eventMmapBufs[eventId] = nil + return + } + if !perfStartCounter(fd) { + closefd(fd) + perfUnsetMmap(mmapBuf) + _g_.m.eventAttrs[eventId] = nil + _g_.m.eventMmapBufs[eventId] = nil + return + } + } +} + +//go:nowritebarrierrec +func sigprofPMUHandler(info *siginfo, c *sigctxt, gp *g, _g_ *g) { + fd := info.si_fd + + if !perfStopCounter(fd) { + return + } + + var eventId int = -1 + for i := 0; i < GO_COUNT_PMU_EVENTS_MAX; i++ { + if _g_.m.eventFds[i] == fd && _g_.m.eventAttrs[i] != nil { + eventId = i + break + } + } + + if eventId != -1 { + mmapBuf := (*perfEventMmapPage)(_g_.m.eventMmapBufs[eventId]) + + head := mmapBuf.data_head + rmb() // on SMP-capable platforms, after reading the data_head value, user space should issue a memory barrier + + for { + tail := mmapBuf.data_tail + + remains := head - tail + if remains <= 0 { + break + } + + var hdr perfEventHeader + + // the causes of passing 'mmapBuf.data_head' by value to functions + // perfSkipAll, perfReadHeader, perfRecordSample, perfSkipAll and perfSkipRecord: + // 1. it remains unchanged across these function calls + // 2. more importantly, avoid frequenly reading it from the mmap ring buffer => avoid frequenly calling rmb() + if remains < uint64(unsafe.Sizeof(hdr)) { + perfSkipAll(head, mmapBuf) + break + } + + if !perfReadHeader(head, mmapBuf, &hdr) { + println("Failed to read the mmap header") + break + } + + if hdr.Type == _PERF_RECORD_SAMPLE { + var sampleData perfSampleData + + sampleData.isPreciseIP = (hdr.misc & _PERF_RECORD_MISC_EXACT_IP) != 0 + + perfRecordSample(head, mmapBuf, _g_.m.eventAttrs[eventId], &sampleData) + + sigprofPMU(c.sigpc(), c.sigsp(), c.siglr(), gp, _g_.m, eventId /* , &sampleData */) + } else if hdr.size == 0 { + perfSkipAll(head, mmapBuf) + } else { + perfSkipRecord(head, mmapBuf, &hdr) + } + } + } else { // should never be taken + println("File descriptor ", fd, " not found in _g_.m.eventFds") + } + + if !perfStartCounter(fd) { + return + } +} diff --git a/src/runtime/perf_linux.go b/src/runtime/perf_linux.go new file mode 100644 index 0000000000..1e9779c674 --- /dev/null +++ b/src/runtime/perf_linux.go @@ -0,0 +1,221 @@ +package runtime + +import ( + "unsafe" +) + +func ioctl(fd int32, req, arg int) (r, err int) + +//go:noescape +func perfEventOpen(attr *perfEventAttr, pid, cpu, groupFd, flags, dummy int) (r int32, r2, err int) + +const perfDataPages = 2 // use 2^n data pages +var perfPageSize uint64 +var perfPageMask uint64 + +func perfAttrInit(eventId int32, eventAttr *PMUEventAttr, perfAttr *perfEventAttr) { + perfAttr.Type = perfEventOpt[eventId].Type + perfAttr.size = uint32(unsafe.Sizeof(*perfAttr)) + + if eventId == GO_COUNT_HW_RAW { + perfAttr.config = eventAttr.RawEvent + } else { + perfAttr.config = perfEventOpt[eventId].config + } + + perfAttr.sample = eventAttr.Period + if perfAttr.sample == 0 { + perfAttr.read_format = _PERF_FORMAT_TOTAL_TIME_ENABLED | _PERF_FORMAT_TOTAL_TIME_RUNNING + } + + if eventAttr.IsSampleIPIncluded { + perfAttr.sample_type = _PERF_SAMPLE_IP + } + if eventAttr.IsSampleAddrIncluded { + perfAttr.sample_type |= _PERF_SAMPLE_ADDR + } + if eventAttr.IsSampleCallchainIncluded { + perfAttr.sample_type |= _PERF_SAMPLE_CALLCHAIN + } + if eventAttr.IsSampleThreadIDIncluded { + perfAttr.sample_type |= _PERF_SAMPLE_TID + } + + perfAttr.bits = 1 // the counter is disabled and will be enabled later + perfAttr.bits |= uint64(eventAttr.PreciseIP) << 15 // precise ip + if !eventAttr.IsKernelIncluded { // don't count kernel + perfAttr.bits |= 1 << 5 + } + if !eventAttr.IsHvIncluded { // don't count hypervisor + perfAttr.bits |= 1 << 6 + } + if !eventAttr.IsIdleIncluded { // don't count when idle + perfAttr.bits |= 1 << 7 + } + if !eventAttr.IsCallchainKernelIncluded { + perfAttr.bits |= 1 << 21 + } + if !eventAttr.IsCallchainUserIncluded { + perfAttr.bits |= 1 << 22 + } + + perfAttr.wakeup = 1 // counter overflow notifications happen after wakeup_events samples +} + +func perfMmapInit() { + perfPageSize = uint64(physPageSize) + perfPageMask = perfPageSize*perfDataPages - 1 +} + +func perfMmapSize() uintptr { + if perfPageSize == 0 { + println("The perf page size has been unknown!") + } + return uintptr(perfPageSize * (perfDataPages + 1 /* metadata page */)) +} + +func perfSetMmap(fd int32) unsafe.Pointer { + if perfPageSize == 0 { + perfMmapInit() + } + + size := perfMmapSize() + mmapBuf, err := mmap(nil, size, _PROT_WRITE|_PROT_READ, _MAP_SHARED, fd, 0 /* page offset */) + if err != 0 { + return nil + } + + return mmapBuf +} + +func perfUnsetMmap(mmapBuf unsafe.Pointer) { + size := perfMmapSize() + munmap(mmapBuf, size) +} + +func perfSkipNBytes(head uint64, mmapBuf *perfEventMmapPage, n uint64) { + tail := mmapBuf.data_tail + + remains := head - tail + if n > remains { + n = remains + } + mmapBuf.data_tail += n +} + +func perfSkipRecord(head uint64, mmapBuf *perfEventMmapPage, hdr *perfEventHeader) { + if mmapBuf == nil { + return + } + + remains := uint64(hdr.size) - uint64(unsafe.Sizeof(*hdr)) + if remains > 0 { + perfSkipNBytes(head, mmapBuf, remains) + } +} + +func perfSkipAll(head uint64, mmapBuf *perfEventMmapPage) { + if mmapBuf == nil { + return + } + + tail := mmapBuf.data_tail + + remains := head - tail + if remains > 0 { + mmapBuf.data_tail += remains + } +} + +func perfReadNbytes(head uint64, mmapBuf *perfEventMmapPage, buf unsafe.Pointer, n uint64) bool { + if mmapBuf == nil { + return false + } + + // front of the circular data buffer + data := unsafe.Pointer(uintptr(unsafe.Pointer(mmapBuf)) + uintptr(perfPageSize)) + + tail := mmapBuf.data_tail + + // compute bytes available in the circular buffer + byteAvailable := head - tail + if n > byteAvailable { + return false + } + + // compute offset of tail in the circular buffer + tail &= perfPageMask + + bytesAtRight := (perfPageMask + 1) - tail + + // bytes to copy to the right of tail + var right uint64 + if bytesAtRight < n { + right = bytesAtRight + } else { + right = n + } + + // copy bytes from tail position + memmove(buf, unsafe.Pointer(uintptr(data)+uintptr(tail)), uintptr(right)) + + // if necessary, wrap and continue copy from left edge of buf + if n > right { + left := n - right + memmove(unsafe.Pointer(uintptr(buf)+uintptr(right)), data, uintptr(left)) + } + + // update tail after consuming n bytes + mmapBuf.data_tail += n + + return true +} + +func perfReadHeader(head uint64, mmapBuf *perfEventMmapPage, hdr *perfEventHeader) bool { + return perfReadNbytes(head, mmapBuf, unsafe.Pointer(hdr), uint64(unsafe.Sizeof(*hdr))) +} + +// The order where values are read has to match the mmap ring buffer layout +func perfRecordSample(head uint64, mmapBuf *perfEventMmapPage, eventAttr *PMUEventAttr, sampleData *perfSampleData) { + if eventAttr.IsSampleIPIncluded { + perfReadNbytes(head, mmapBuf, unsafe.Pointer(&(sampleData.ip)), uint64(unsafe.Sizeof(sampleData.ip))) + } + if eventAttr.IsSampleThreadIDIncluded { + perfReadNbytes(head, mmapBuf, unsafe.Pointer(&(sampleData.pid)), uint64(unsafe.Sizeof(sampleData.pid))) + perfReadNbytes(head, mmapBuf, unsafe.Pointer(&(sampleData.tid)), uint64(unsafe.Sizeof(sampleData.tid))) + } + if eventAttr.IsSampleAddrIncluded { + perfReadNbytes(head, mmapBuf, unsafe.Pointer(&(sampleData.addr)), uint64(unsafe.Sizeof(sampleData.addr))) + } +} + +func perfStartCounter(fd int32) bool { + _, err := ioctl(fd, _PERF_EVENT_IOC_ENABLE, 0) + if err != 0 { + println("Failed to enable the event count") + return false + } + return true +} + +func perfStopCounter(fd int32) bool { + _, err := ioctl(fd, _PERF_EVENT_IOC_DISABLE, 0) + if err != 0 { + println("Failed to disable the event count") + return false + } + return true +} + +func perfResetCounter(fd int32) bool { + _, err := ioctl(fd, _PERF_EVENT_IOC_RESET, 0) + if err != 0 { + println("Failed to reset the event count") + return false + } + return true +} + +func perfReadCounter(fd int32, val *uint64) bool { + return read(fd, unsafe.Pointer(val), int32(unsafe.Sizeof(*val))) != -1 +} diff --git a/src/runtime/pprof/pprof.go b/src/runtime/pprof/pprof.go index 74cdd15cfb..2a78ca525e 100644 --- a/src/runtime/pprof/pprof.go +++ b/src/runtime/pprof/pprof.go @@ -79,6 +79,7 @@ import ( "io" "runtime" "sort" + "strconv" "strings" "sync" "text/tabwriter" @@ -721,11 +722,17 @@ type runtimeProfile []runtime.StackRecord func (p runtimeProfile) Len() int { return len(p) } func (p runtimeProfile) Stack(i int) []uintptr { return p[i].Stack() } +var mux sync.Mutex + var cpu struct { - sync.Mutex profiling bool done chan bool } +var pmu struct { + profiling bool + eventOn [runtime.GO_COUNT_PMU_EVENTS_MAX]bool + wg sync.WaitGroup +} // StartCPUProfile enables CPU profiling for the current process. // While profiling, the profile will be buffered and written to w. @@ -738,7 +745,14 @@ var cpu struct { // not to the one used by Go. To make it work, call os/signal.Notify // for syscall.SIGPROF, but note that doing so may break any profiling // being done by the main program. -func StartCPUProfile(w io.Writer) error { +func StartCPUProfile(w io.Writer, profileHz ...int) error { + mux.Lock() + defer mux.Unlock() + + if pmu.profiling { + return fmt.Errorf("Please disable pmu profiling before enabling cpu profiling") + } + // The runtime routines allow a variable profiling rate, // but in practice operating systems cannot trigger signals // at more than about 500 Hz, and our processing of the @@ -748,10 +762,11 @@ func StartCPUProfile(w io.Writer) error { // system, and a nice round number to make it easy to // convert sample counts to seconds. Instead of requiring // each client to specify the frequency, we hard code it. - const hz = 100 + var hz = 100 + if len(profileHz) != 0 { + hz = profileHz[0] + } - cpu.Lock() - defer cpu.Unlock() if cpu.done == nil { cpu.done = make(chan bool) } @@ -765,12 +780,192 @@ func StartCPUProfile(w io.Writer) error { return nil } +func StartPMUProfile(opts ...ProfilingOption) error { + if runtime.GOOS != "linux" || runtime.GOARCH != "amd64" { // enabling only on Linux AMD64 + return fmt.Errorf("not implemented") + } + mux.Lock() + defer mux.Unlock() + + if cpu.profiling { + return fmt.Errorf("Please disable cpu profiling before enabling pmu profiling") + } + + pmu.wg.Add(len(opts)) + // Double-check. + if pmu.profiling { + return fmt.Errorf("pmu profiling already in use") + } + pmu.profiling = true + + for _, opt := range opts { + if err := opt.apply(); err != nil { + return err + } + } + return nil +} + +type PMUEventConfig struct { + Period int64 + RawEvent int64 + PreciseIP int8 + IsSampleIPIncluded bool + IsSampleThreadIDIncluded bool + IsSampleAddrIncluded bool + IsSampleCallchainIncluded bool + IsKernelIncluded bool + IsHvIncluded bool + IsIdleIncluded bool + IsCallchainKernelIncluded bool + IsCallchainUserIncluded bool + // TODO: IsLBREnabled bool +} + +func getPreciseIP(preciseIP int8) uint8 { + if preciseIP < 0 { + preciseIP = 0 + } else if preciseIP > 3 { + preciseIP = 3 + } + return uint8(preciseIP) +} + +func populatePMUProfiler(w io.Writer, eventConfig *PMUEventConfig, eventId int, eventName string) { + pmu.eventOn[eventId] = true + eventAttr := runtime.PMUEventAttr{ + Period: uint64(eventConfig.Period), + RawEvent: uint64(eventConfig.RawEvent), + PreciseIP: getPreciseIP(eventConfig.PreciseIP), + IsSampleIPIncluded: eventConfig.IsSampleIPIncluded, + IsSampleThreadIDIncluded: eventConfig.IsSampleThreadIDIncluded, + IsSampleAddrIncluded: eventConfig.IsSampleAddrIncluded, + IsSampleCallchainIncluded: eventConfig.IsSampleCallchainIncluded, + IsKernelIncluded: eventConfig.IsKernelIncluded, + IsHvIncluded: eventConfig.IsHvIncluded, + IsIdleIncluded: eventConfig.IsIdleIncluded, + IsCallchainKernelIncluded: eventConfig.IsCallchainKernelIncluded, + IsCallchainUserIncluded: eventConfig.IsCallchainUserIncluded, + } + runtime.SetPMUProfile(eventId, &eventAttr) + go pmuProfileWriter(w, eventId, eventName) +} + +func WithProfilingPMUCycles(w io.Writer, eventConfig *PMUEventConfig) ProfilingOption { + return profilingOptionFunc(func() error { + if eventConfig.Period <= 0 { + return fmt.Errorf("Period should be > 0") + } + // TODO: create a table of standard clamp values + // Clamp period to something reasonable + if eventConfig.Period < 1000 { + eventConfig.Period = 1000 + } + + populatePMUProfiler(w, eventConfig /* event ID */, runtime.GO_COUNT_HW_CPU_CYCLES /* event name */, "cycles") + return nil + }) +} + +func WithProfilingPMUInstructions(w io.Writer, eventConfig *PMUEventConfig) ProfilingOption { + return profilingOptionFunc(func() error { + if eventConfig.Period <= 0 { + return fmt.Errorf("Period should be > 0") + } + // TODO: create a table of standard clamp values + // Clamp period to something reasonable + if eventConfig.Period < 1000 { + eventConfig.Period = 1000 + } + + populatePMUProfiler(w, eventConfig /* event ID */, runtime.GO_COUNT_HW_INSTRUCTIONS /* event name */, "instructions") + return nil + }) +} + +func WithProfilingPMUCacheReferences(w io.Writer, eventConfig *PMUEventConfig) ProfilingOption { + return profilingOptionFunc(func() error { + if eventConfig.Period <= 0 { + return fmt.Errorf("Period should be > 0") + } + // TODO: create a table of standard clamp values + // TODO: clamp period to something reasonable + + populatePMUProfiler(w, eventConfig /* event ID */, runtime.GO_COUNT_HW_CACHE_REFERENCES /* event name */, "cache references") + return nil + }) +} + +func WithProfilingPMUCacheMisses(w io.Writer, eventConfig *PMUEventConfig) ProfilingOption { + return profilingOptionFunc(func() error { + if eventConfig.Period <= 0 { + return fmt.Errorf("Period should be > 0") + } + // TODO: create a table of standard clamp values + // TODO: clamp period to something reasonable + + populatePMUProfiler(w, eventConfig /* event ID */, runtime.GO_COUNT_HW_CACHE_MISSES /* event name */, "cache misses") + return nil + }) +} + +func WithProfilingPMUCacheLLReadAccesses(w io.Writer, eventConfig *PMUEventConfig) ProfilingOption { + return profilingOptionFunc(func() error { + if eventConfig.Period <= 0 { + return fmt.Errorf("Period should be > 0") + } + // TODO: create a table of standard clamp values + // TODO: clamp period to something reasonable + + populatePMUProfiler(w, eventConfig /* event ID */, runtime.GO_COUNT_HW_CACHE_LL_READ_ACCESSES /* event name */, "last-level cache read accesses") + return nil + }) +} + +func WithProfilingPMUCacheLLReadMisses(w io.Writer, eventConfig *PMUEventConfig) ProfilingOption { + return profilingOptionFunc(func() error { + if eventConfig.Period <= 0 { + return fmt.Errorf("Period should be > 0") + } + // TODO: create a table of standard clamp values + // TODO: clamp period to something reasonable + + populatePMUProfiler(w, eventConfig /* event ID */, runtime.GO_COUNT_HW_CACHE_LL_READ_MISSES /* event name */, "last-level cache read misses") + return nil + }) +} + +func WithProfilingPMURaw(w io.Writer, eventConfig *PMUEventConfig) ProfilingOption { + return profilingOptionFunc(func() error { + if eventConfig.Period <= 0 { + return fmt.Errorf("Period should be > 0") + } + // TODO: create a table of standard clamp values + // TODO: clamp period to something reasonable + + if eventConfig.RawEvent < 0 { + return fmt.Errorf("RawEvent should be >= 0") + } + + populatePMUProfiler(w, eventConfig /* event ID */, runtime.GO_COUNT_HW_RAW /* event name */, "r"+strconv.FormatInt(eventConfig.RawEvent, 16)) + return nil + }) +} + +type ProfilingOption interface { + apply() error +} + +type profilingOptionFunc func() error + +func (pof profilingOptionFunc) apply() error { return pof() } + // readProfile, provided by the runtime, returns the next chunk of // binary CPU profiling stack trace data, blocking until data is available. // If profiling is turned off and all the profile data accumulated while it was // on has been returned, readProfile returns eof=true. // The caller must save the returned data and tags before calling readProfile again. -func readProfile() (data []uint64, tags []unsafe.Pointer, eof bool) +func readProfile(eventIds ...int) (data []uint64, tags []unsafe.Pointer, eof bool) func profileWriter(w io.Writer) { b := newProfileBuilder(w) @@ -794,12 +989,32 @@ func profileWriter(w io.Writer) { cpu.done <- true } +func pmuProfileWriter(w io.Writer, eventId int, eventName string) { + b := newProfileBuilder(w) + var err error + for { + time.Sleep(100 * time.Millisecond) + data, tags, eof := readProfile(eventId) + if e := b.addPMUData(data, tags); e != nil && err == nil { + err = e + } + if eof { + break + } + } + if err != nil { + panic("runtime/pprof: converting profile: " + err.Error()) + } + b.pmuBuild(eventName) + pmu.wg.Done() +} + // StopCPUProfile stops the current CPU profile, if any. // StopCPUProfile only returns after all the writes for the // profile have completed. func StopCPUProfile() { - cpu.Lock() - defer cpu.Unlock() + mux.Lock() + defer mux.Unlock() if !cpu.profiling { return @@ -809,6 +1024,23 @@ func StopCPUProfile() { <-cpu.done } +func StopPMUProfile() { + mux.Lock() + defer mux.Unlock() + + if !pmu.profiling { + return + } + pmu.profiling = false + + for i := 0; i < runtime.GO_COUNT_PMU_EVENTS_MAX; i++ { + if pmu.eventOn[i] { + runtime.SetPMUProfile(i, nil) + } + } + pmu.wg.Wait() +} + // countBlock returns the number of records in the blocking profile. func countBlock() int { n, _ := runtime.BlockProfile(nil) diff --git a/src/runtime/pprof/proto.go b/src/runtime/pprof/proto.go index 7864dd79ad..29b4c2ebe6 100644 --- a/src/runtime/pprof/proto.go +++ b/src/runtime/pprof/proto.go @@ -29,11 +29,12 @@ func funcPC(f interface{}) uintptr { // A profileBuilder writes a profile incrementally from a // stream of profile samples delivered by the runtime. type profileBuilder struct { - start time.Time - end time.Time - havePeriod bool - period int64 - m profMap + start time.Time + end time.Time + havePeriod bool + period int64 + isPMUEnabled bool + m profMap // encoding state w io.Writer @@ -312,26 +313,8 @@ func newProfileBuilder(w io.Writer) *profileBuilder { return b } -// addCPUData adds the CPU profiling data to the profile. -// The data must be a whole number of records, -// as delivered by the runtime. -func (b *profileBuilder) addCPUData(data []uint64, tags []unsafe.Pointer) error { - if !b.havePeriod { - // first record is period - if len(data) < 3 { - return fmt.Errorf("truncated profile") - } - if data[0] != 3 || data[2] == 0 { - return fmt.Errorf("malformed profile") - } - // data[2] is sampling rate in Hz. Convert to sampling - // period in nanoseconds. - b.period = 1e9 / int64(data[2]) - b.havePeriod = true - data = data[3:] - } - - // Parse CPU samples from the profile. +func (b *profileBuilder) addData(data []uint64, tags []unsafe.Pointer) error { + // Parse samples from the profile. // Each sample is 3+n uint64s: // data[0] = 3+n // data[1] = time stamp (ignored) @@ -374,19 +357,46 @@ func (b *profileBuilder) addCPUData(data []uint64, tags []unsafe.Pointer) error return nil } -// build completes and returns the constructed profile. -func (b *profileBuilder) build() { - b.end = time.Now() +// addCPUData adds the CPU profiling data to the profile. +// The data must be a whole number of records, +// as delivered by the runtime. +func (b *profileBuilder) addCPUData(data []uint64, tags []unsafe.Pointer) error { + if !b.havePeriod { + // first record is period + if len(data) < 3 { + return fmt.Errorf("truncated profile") + } + if data[0] != 3 || data[2] == 0 { + return fmt.Errorf("malformed profile") + } + // data[2] is sampling rate in Hz. Convert to sampling + // period in nanoseconds. + b.period = 1e9 / int64(data[2]) + b.havePeriod = true + data = data[3:] + } - b.pb.int64Opt(tagProfile_TimeNanos, b.start.UnixNano()) - if b.havePeriod { // must be CPU profile - b.pbValueType(tagProfile_SampleType, "samples", "count") - b.pbValueType(tagProfile_SampleType, "cpu", "nanoseconds") - b.pb.int64Opt(tagProfile_DurationNanos, b.end.Sub(b.start).Nanoseconds()) - b.pbValueType(tagProfile_PeriodType, "cpu", "nanoseconds") - b.pb.int64Opt(tagProfile_Period, b.period) + return b.addData(data, tags) +} + +func (b *profileBuilder) addPMUData(data []uint64, tags []unsafe.Pointer) error { + if !b.isPMUEnabled { + // first record is period + if len(data) < 3 { + return fmt.Errorf("truncated profile") + } + if data[0] != 3 || data[2] == 0 { + return fmt.Errorf("malformed profile") + } + b.isPMUEnabled = true + b.period = int64(data[2]) + data = data[3:] } + return b.addData(data, tags) +} + +func (b *profileBuilder) profileBuild() { values := []int64{0, 0} var locs []uint64 for e := b.m.all; e != nil; e = e.nextAll { @@ -435,6 +445,32 @@ func (b *profileBuilder) build() { b.zw.Close() } +// build completes and returns the constructed profile. +func (b *profileBuilder) build() { + b.end = time.Now() + + b.pb.int64Opt(tagProfile_TimeNanos, b.start.UnixNano()) + if b.havePeriod { // must be CPU profile + b.pbValueType(tagProfile_SampleType, "samples", "count") + b.pbValueType(tagProfile_SampleType, "cpu", "nanoseconds") + b.pb.int64Opt(tagProfile_DurationNanos, b.end.Sub(b.start).Nanoseconds()) + b.pbValueType(tagProfile_PeriodType, "cpu", "nanoseconds") + b.pb.int64Opt(tagProfile_Period, b.period) + } + + b.profileBuild() +} + +func (b *profileBuilder) pmuBuild(eventName string) { + b.pb.int64Opt(tagProfile_TimeNanos, b.start.UnixNano()) + b.pbValueType(tagProfile_SampleType, "samples", "count") + b.pbValueType(tagProfile_SampleType, eventName, "count") + b.pbValueType(tagProfile_PeriodType, eventName, "count") + b.pb.int64Opt(tagProfile_Period, b.period) + + b.profileBuild() +} + // readMapping reads /proc/self/maps and writes mappings to b.pb. // It saves the address ranges of the mappings in b.mem for use // when emitting locations. diff --git a/src/runtime/proc.go b/src/runtime/proc.go index e9eca23138..50d081c85f 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -2125,6 +2125,9 @@ func gcstopm() { stopm() } +var setProcessPMUProfilerFptr func(eventAttr *PMUEventAttr) +var setThreadPMUProfilerFptr func(eventId int32, eventAttr *PMUEventAttr) + // Schedules gp to run on the current M. // If inheritTime is true, gp inherits the remaining time in the // current time slice. Otherwise, it starts a new time slice. @@ -2153,6 +2156,15 @@ func execute(gp *g, inheritTime bool) { setThreadCPUProfiler(hz) } + if setThreadPMUProfilerFptr != nil { + eventAttrs := sched.eventAttrs + for eventId := 0; eventId < GO_COUNT_PMU_EVENTS_MAX; eventId++ { + if _g_.m.eventAttrs[eventId] != eventAttrs[eventId] { + setThreadPMUProfilerFptr(int32(eventId), eventAttrs[eventId]) + } + } + } + if trace.enabled { // GoSysExit has to happen when we have a P, but before GoStart. // So we emit it here. @@ -3620,6 +3632,11 @@ var prof struct { hz int32 } +var pmuEvent [GO_COUNT_PMU_EVENTS_MAX]struct { + signalLock uint32 + eventAttr *PMUEventAttr +} + func _System() { _System() } func _ExternalCode() { _ExternalCode() } func _LostExternalCode() { _LostExternalCode() } @@ -3627,40 +3644,7 @@ func _GC() { _GC() } func _LostSIGPROFDuringAtomic64() { _LostSIGPROFDuringAtomic64() } func _VDSO() { _VDSO() } -// Counts SIGPROFs received while in atomic64 critical section, on mips{,le} -var lostAtomic64Count uint64 - -// Called if we receive a SIGPROF signal. -// Called by the signal handler, may run during STW. -//go:nowritebarrierrec -func sigprof(pc, sp, lr uintptr, gp *g, mp *m) { - if prof.hz == 0 { - return - } - - // On mips{,le}, 64bit atomics are emulated with spinlocks, in - // runtime/internal/atomic. If SIGPROF arrives while the program is inside - // the critical section, it creates a deadlock (when writing the sample). - // As a workaround, create a counter of SIGPROFs while in critical section - // to store the count, and pass it to sigprof.add() later when SIGPROF is - // received from somewhere else (with _LostSIGPROFDuringAtomic64 as pc). - if GOARCH == "mips" || GOARCH == "mipsle" || GOARCH == "arm" { - if f := findfunc(pc); f.valid() { - if hasPrefix(funcname(f), "runtime/internal/atomic") { - lostAtomic64Count++ - return - } - } - } - - // Profiling runs concurrently with GC, so it must not allocate. - // Set a trap in case the code does allocate. - // Note that on windows, one thread takes profiles of all the - // other threads, so mp is usually not getg().m. - // In fact mp may not even be stopped. - // See golang.org/issue/17165. - getg().m.mallocing++ - +func stackUnwinding(pc, sp, lr uintptr, gp *g, mp *m, stk []uintptr) int { // Define that a "user g" is a user-created goroutine, and a "system g" // is one that is m->g0 or m->gsignal. // @@ -3730,7 +3714,6 @@ func sigprof(pc, sp, lr uintptr, gp *g, mp *m) { if gp == nil || sp < gp.stack.lo || gp.stack.hi < sp || setsSP(pc) || (mp != nil && mp.vdsoSP != 0) { traceback = false } - var stk [maxCPUProfStack]uintptr n := 0 if mp.ncgo > 0 && mp.curg != nil && mp.curg.syscallpc != 0 && mp.curg.syscallsp != 0 { cgoOff := 0 @@ -3785,7 +3768,43 @@ func sigprof(pc, sp, lr uintptr, gp *g, mp *m) { } } } + return n +} + +// Counts SIGPROFs received while in atomic64 critical section, on mips{,le} +var lostAtomic64Count uint64 + +// Called if we receive a SIGPROF signal and prof is enabled. +// Called by the signal handler, may run during STW. +//go:nowritebarrierrec +func sigprof(pc, sp, lr uintptr, gp *g, mp *m) { + if prof.hz == 0 { + return + } + // On mips{,le}, 64bit atomics are emulated with spinlocks, in + // runtime/internal/atomic. If SIGPROF arrives while the program is inside + // the critical section, it creates a deadlock (when writing the sample). + // As a workaround, create a counter of SIGPROFs while in critical section + // to store the count, and pass it to sigprof.add() later when SIGPROF is + // received from somewhere else (with _LostSIGPROFDuringAtomic64 as pc). + if GOARCH == "mips" || GOARCH == "mipsle" || GOARCH == "arm" { + if f := findfunc(pc); f.valid() { + if hasPrefix(funcname(f), "runtime/internal/atomic") { + lostAtomic64Count++ // lostAtomic64Count[event]++ + return + } + } + } + // Profiling runs concurrently with GC, so it must not allocate. + // Set a trap in case the code does allocate. + // Note that on windows, one thread takes profiles of all the + // other threads, so mp is usually not getg().m. + // In fact mp may not even be stopped. + // See golang.org/issue/17165. + getg().m.mallocing++ + var stk [maxCPUProfStack]uintptr + n := stackUnwinding(pc, sp, lr, gp, mp, stk[:]) if prof.hz != 0 { if (GOARCH == "mips" || GOARCH == "mipsle" || GOARCH == "arm") && lostAtomic64Count > 0 { cpuprof.addLostAtomic64(lostAtomic64Count) @@ -3796,6 +3815,37 @@ func sigprof(pc, sp, lr uintptr, gp *g, mp *m) { getg().m.mallocing-- } +var lostPMUAtomic64Count [GO_COUNT_PMU_EVENTS_MAX]uint64 + +// Called if we receive a SIGPROF signal and PMU is enabled. +// Called by the signal handler, may run during STW. +//go:nowritebarrierrec +func sigprofPMU(pc, sp, lr uintptr, gp *g, mp *m, eventId int) { + if pmuEvent[eventId].eventAttr == nil { + return + } + if GOARCH == "mips" || GOARCH == "mipsle" || GOARCH == "arm" { + if f := findfunc(pc); f.valid() { + if hasPrefix(funcname(f), "runtime/internal/atomic") { + lostPMUAtomic64Count[eventId]++ + return + } + } + } + + getg().m.mallocing++ + var stk [maxCPUProfStack]uintptr + n := stackUnwinding(pc, sp, lr, gp, mp, stk[:]) + if pmuEvent[eventId].eventAttr != nil { + if (GOARCH == "mips" || GOARCH == "mipsle" || GOARCH == "arm") && lostPMUAtomic64Count[eventId] > 0 { + pmuprof[eventId].addLostAtomic64(lostPMUAtomic64Count[eventId], eventId) + lostPMUAtomic64Count[eventId] = 0 + } + pmuprof[eventId].add(gp, stk[:n], eventId) + } + getg().m.mallocing-- +} + // If the signal handler receives a SIGPROF signal on a non-Go thread, // it tries to collect a traceback into sigprofCallers. // sigprofCallersUse is set to non-zero while sigprofCallers holds a traceback. @@ -3820,6 +3870,20 @@ func sigprofNonGo() { atomic.Store(&sigprofCallersUse, 0) } +//go:nosplit +//go:nowritebarrierrec +func sigprofPMUNonGo(eventId int) { + if pmuEvent[eventId].eventAttr != nil { + n := 0 + for n < len(sigprofCallers) && sigprofCallers[n] != 0 { + n++ + } + pmuprof[eventId].addNonGo(sigprofCallers[:n], eventId) + } + + atomic.Store(&sigprofCallersUse, 0) +} + // sigprofNonGoPC is called when a profiling signal arrived on a // non-Go thread and we have a single PC value, not a stack trace. // g is nil, and what we can do is very limited. @@ -3835,6 +3899,18 @@ func sigprofNonGoPC(pc uintptr) { } } +//go:nosplit +//go:nowritebarrierrec +func sigprofPMUNonGoPC(pc uintptr, eventId int) { + if pmuEvent[eventId].eventAttr != nil { + stk := []uintptr{ + pc, + funcPC(_ExternalCode) + sys.PCQuantum, + } + pmuprof[eventId].addNonGo(stk, eventId) + } +} + // Reports whether a function will set the SP // to an absolute value. Important that // we don't traceback when these are at the bottom @@ -3897,6 +3973,43 @@ func setcpuprofilerate(hz int32) { _g_.m.locks-- } +func setpmuprofile(eventId int32, eventAttr *PMUEventAttr) { + // setProcessPMUProfilerFptr and setProcessPMUProfilerFptr are write once variables. + // Hence, there cannot be any race from checking non-nil to invoking them. + if setProcessPMUProfilerFptr == nil || setThreadPMUProfilerFptr == nil { + return + } + + // Disable preemption, otherwise we can be rescheduled to another thread + // that has profiling enabled. + _g_ := getg() + _g_.m.locks++ + + // Stop profiler on this thread so that it is safe to lock pmuEvent[eventId]. + // if a profiling signal came in while we had pmuEvent[eventId] locked, + // it would deadlock. + setThreadPMUProfilerFptr(eventId, nil) + + for !atomic.Cas(&pmuEvent[eventId].signalLock, 0, 1) { + osyield() + } + if pmuEvent[eventId].eventAttr != eventAttr { + setProcessPMUProfilerFptr(eventAttr) + pmuEvent[eventId].eventAttr = eventAttr + } + atomic.Store(&pmuEvent[eventId].signalLock, 0) + + lock(&sched.lock) // don't know why we lock scheduler, simply following the code pattern in prof + sched.eventAttrs[eventId] = eventAttr + unlock(&sched.lock) + + if eventAttr != nil { + setThreadPMUProfilerFptr(eventId, eventAttr) + } + + _g_.m.locks-- +} + // init initializes pp, which may be a freshly allocated p or a // previously destroyed p, and transitions it to status _Pgcstop. func (pp *p) init(id int32) { diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index bc5b48222b..d86ff7c498 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -455,24 +455,28 @@ type m struct { divmod uint32 // div/mod denominator for arm - known to liblink // Fields not known to debuggers. - procid uint64 // for debuggers, but offset not hard-coded - gsignal *g // signal-handling g - goSigStack gsignalStack // Go-allocated signal handling stack - sigmask sigset // storage for saved signal mask - tls [6]uintptr // thread-local storage (for x86 extern register) - mstartfn func() - curg *g // current running goroutine - caughtsig guintptr // goroutine running during fatal signal - p puintptr // attached p for executing go code (nil if not executing go code) - nextp puintptr - oldp puintptr // the p that was attached before executing a syscall - id int64 - mallocing int32 - throwing int32 - preemptoff string // if != "", keep curg running on this m - locks int32 - dying int32 - profilehz int32 + procid uint64 // for debuggers, but offset not hard-coded + gsignal *g // signal-handling g + goSigStack gsignalStack // Go-allocated signal handling stack + sigmask sigset // storage for saved signal mask + tls [6]uintptr // thread-local storage (for x86 extern register) + mstartfn func() + curg *g // current running goroutine + caughtsig guintptr // goroutine running during fatal signal + p puintptr // attached p for executing go code (nil if not executing go code) + nextp puintptr + oldp puintptr // the p that was attached before executing a syscall + id int64 + mallocing int32 + throwing int32 + preemptoff string // if != "", keep curg running on this m + locks int32 + dying int32 + profilehz int32 + eventFds [GO_COUNT_PMU_EVENTS_MAX]int32 + eventAttrs [GO_COUNT_PMU_EVENTS_MAX]*PMUEventAttr + // eventMmapBufs [GO_COUNT_PMU_EVENTS_MAX]*perfEventMmapPage + eventMmapBufs [GO_COUNT_PMU_EVENTS_MAX]unsafe.Pointer spinning bool // m is out of work and is actively looking for work blocked bool // m is blocked on a note newSigstack bool // minit on C thread called sigaltstack @@ -673,8 +677,8 @@ type schedt struct { safePointWait int32 safePointNote note - profilehz int32 // cpu profiling rate - + profilehz int32 // cpu profiling rate + eventAttrs [GO_COUNT_PMU_EVENTS_MAX]*PMUEventAttr procresizetime int64 // nanotime() of last change to gomaxprocs totaltime int64 // ∫gomaxprocs dt up to procresizetime } diff --git a/src/runtime/signal_sighandler.go b/src/runtime/signal_sighandler.go index bec4653218..d9456e9ba2 100644 --- a/src/runtime/signal_sighandler.go +++ b/src/runtime/signal_sighandler.go @@ -7,6 +7,7 @@ package runtime import ( + "runtime/internal/atomic" "unsafe" ) @@ -19,6 +20,8 @@ var crashing int32 // suppressed. var testSigtrap func(info *siginfo, ctxt *sigctxt, gp *g) bool +var sigprofPMUHandlerFptr func(info *siginfo, c *sigctxt, gp *g, _g_ *g) + // sighandler is invoked when a signal occurs. The global g will be // set to a gsignal goroutine and we will be running on the alternate // signal stack. The parameter g will be the value of the global g @@ -35,7 +38,12 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) { c := &sigctxt{info, ctxt} if sig == _SIGPROF { - sigprof(c.sigpc(), c.sigsp(), c.siglr(), gp, _g_.m) + state := atomic.Load(&cpuorpmuprofiler) + if state == _ITIMER_INSTALLED { + sigprof(c.sigpc(), c.sigsp(), c.siglr(), gp, _g_.m) + } else if state == _PMU_INSTALLED { + sigprofPMUHandlerFptr(info, (*sigctxt)(noescape(unsafe.Pointer(c))), gp, _g_) + } return } diff --git a/src/runtime/signal_unix.go b/src/runtime/signal_unix.go index 1dd56989b4..9e3de40954 100644 --- a/src/runtime/signal_unix.go +++ b/src/runtime/signal_unix.go @@ -38,6 +38,12 @@ const ( _SIG_IGN uintptr = 1 ) +const ( + _UNINSTALLED = 0 + _ITIMER_INSTALLED = 1 + _PMU_INSTALLED = 2 +) + // Stores the signal handlers registered before Go installed its own. // These signal handlers will be invoked in cases where Go doesn't want to // handle a particular signal (e.g., signal occurred on a non-Go thread). @@ -229,22 +235,26 @@ func clearSignalHandlers() { } } +var cpuorpmuprofiler uint32 + // setProcessCPUProfiler is called when the profiling timer changes. // It is called with prof.lock held. hz is the new timer, and is 0 if // profiling is being disabled. Enable or disable the signal as // required for -buildmode=c-archive. func setProcessCPUProfiler(hz int32) { if hz != 0 { + atomic.Cas(&cpuorpmuprofiler, _UNINSTALLED, _ITIMER_INSTALLED) // Enable the Go signal handler if not enabled. - if atomic.Cas(&handlingSig[_SIGPROF], 0, 1) { + if atomic.Cas(&handlingSig[_SIGPROF], _UNINSTALLED, _ITIMER_INSTALLED) { atomic.Storeuintptr(&fwdSig[_SIGPROF], getsig(_SIGPROF)) setsig(_SIGPROF, funcPC(sighandler)) } } else { + atomic.Cas(&cpuorpmuprofiler, _ITIMER_INSTALLED, _UNINSTALLED) // If the Go signal handler should be disabled by default, // disable it if it is enabled. if !sigInstallGoHandler(_SIGPROF) { - if atomic.Cas(&handlingSig[_SIGPROF], 1, 0) { + if atomic.Cas(&handlingSig[_SIGPROF], _ITIMER_INSTALLED, _UNINSTALLED) { setsig(_SIGPROF, atomic.Loaduintptr(&fwdSig[_SIGPROF])) } } diff --git a/src/runtime/sys_linux_386.s b/src/runtime/sys_linux_386.s index 72c43bd9da..5b5e85c067 100644 --- a/src/runtime/sys_linux_386.s +++ b/src/runtime/sys_linux_386.s @@ -33,6 +33,7 @@ #define SYS_access 33 #define SYS_kill 37 #define SYS_brk 45 +#define SYS_ioctl 54 #define SYS_fcntl 55 #define SYS_munmap 91 #define SYS_socketcall 102 @@ -58,6 +59,7 @@ #define SYS_clock_gettime 265 #define SYS_tgkill 270 #define SYS_epoll_create1 329 +#define SYS_perf_event_open 336 TEXT runtime·exit(SB),NOSPLIT,$0 MOVL $SYS_exit_group, AX @@ -734,3 +736,86 @@ TEXT runtime·sbrk0(SB),NOSPLIT,$0-4 INVOKE_SYSCALL MOVL AX, ret+0(FP) RET + +// psu: not tested +TEXT runtime·perfEventOpen(SB),NOSPLIT,$0-36 + MOVL attr+0(FP), BX + MOVL pid+4(FP), CX + MOVL cpu+8(FP), DX + MOVL groupFd+12(FP), SI + MOVL flags+16(FP), DI + MOVL dummy+20(FP), BP + MOVL $SYS_perf_event_open, AX + INVOKE_SYSCALL + CMPL AX, $0xfffff001 + JLS ok + MOVL $-1, r+24(FP) + MOVL $0, r2+28(FP) + NEGL AX + MOVL AX, err+32(FP) + RET +ok: + MOVL AX, r+24(FP) + MOVL DX, r2+28(FP) + MOVL $0, err+32(FP) + RET + +// psu: not tested +TEXT runtime·ioctl(SB),NOSPLIT,$0-20 + MOVL fd+0(FP), BX + MOVL req+4(FP), CX + MOVL arg+8(FP), DX + MOVL $0, SI + MOVL $0, DI + MOVL $SYS_ioctl, AX + INVOKE_SYSCALL + CMPL AX, $0xfffff001 + JLS ok + MOVL $-1, r+12(FP) + NEGL AX + MOVL AX, err+16(FP) + RET +ok: + MOVL AX, r+12(FP) + MOVL $0, err+16(FP) + RET + +// psu: not tested +TEXT runtime·fcntl(SB),NOSPLIT,$0-20 + MOVL fd+0(FP), BX + MOVL cmd+4(FP), CX + MOVL arg+8(FP), DX + MOVL $0, SI + MOVL $0, DI + MOVL $SYS_fcntl, AX + INVOKE_SYSCALL + CMPL AX, $0xfffff001 + JLS ok + MOVL $-1, r+12(FP) + NEGL AX + MOVL AX, err+16(FP) + RET +ok: + MOVL AX, r+12(FP) + MOVL $0, err+16(FP) + RET + +// psu: not tested +TEXT runtime·fcntl2(SB),NOSPLIT,$0-20 + MOVL fd+0(FP), BX + MOVL cmd+4(FP), CX + MOVL arg+8(FP), DX + MOVL $0, SI + MOVL $0, DI + MOVL $SYS_fcntl, AX + INVOKE_SYSCALL + CMPL AX, $0xfffff001 + JLS ok + MOVL $-1, r+12(FP) + NEGL AX + MOVL AX, err+16(FP) + RET +ok: + MOVL AX, r+12(FP) + MOVL $0, err+16(FP) + RET diff --git a/src/runtime/sys_linux_amd64.s b/src/runtime/sys_linux_amd64.s index 5c300f553d..40a2bd1f7f 100644 --- a/src/runtime/sys_linux_amd64.s +++ b/src/runtime/sys_linux_amd64.s @@ -21,6 +21,7 @@ #define SYS_rt_sigaction 13 #define SYS_rt_sigprocmask 14 #define SYS_rt_sigreturn 15 +#define SYS_ioctl 16 #define SYS_sched_yield 24 #define SYS_mincore 27 #define SYS_madvise 28 @@ -46,6 +47,7 @@ #define SYS_faccessat 269 #define SYS_epoll_pwait 281 #define SYS_epoll_create1 291 +#define SYS_perf_event_open 298 TEXT runtime·exit(SB),NOSPLIT,$0-4 MOVL code+0(FP), DI @@ -723,3 +725,85 @@ TEXT runtime·sbrk0(SB),NOSPLIT,$0-8 SYSCALL MOVQ AX, ret+0(FP) RET + +TEXT runtime·perfEventOpen(SB),NOSPLIT,$0-72 + MOVQ attr+0(FP), DI + MOVQ pid+8(FP), SI + MOVQ cpu+16(FP), DX + MOVQ groupFd+24(FP), R10 + MOVQ flags+32(FP), R8 + MOVQ dummy+40(FP), R9 + MOVQ $SYS_perf_event_open, AX + SYSCALL + CMPQ AX, $0xfffffffffffff001 + JLS ok + MOVL $-1, r+48(FP) + MOVQ $0, r2+56(FP) + NEGQ AX + MOVQ AX, err+64(FP) + RET +ok: + MOVL AX, r+48(FP) + MOVQ DX, r2+56(FP) + MOVQ $0, err+64(FP) + RET + +TEXT runtime·ioctl(SB),NOSPLIT,$0-40 + MOVL fd+0(FP), DI + MOVQ req+8(FP), SI + MOVQ arg+16(FP), DX + MOVQ $0, R10 + MOVQ $0, R8 + MOVQ $0, R9 + MOVQ $SYS_ioctl, AX + SYSCALL + CMPQ AX, $0xfffffffffffff001 + JLS ok + MOVQ $-1, r+24(FP) + NEGQ AX + MOVQ AX, err+32(FP) + RET +ok: + MOVQ AX, r+24(FP) + MOVQ $0, err+32(FP) + RET + +TEXT runtime·fcntl(SB),NOSPLIT,$0-40 + MOVL fd+0(FP), DI + MOVQ cmd+8(FP), SI + MOVQ arg+16(FP), DX + MOVQ $0, R10 + MOVQ $0, R8 + MOVQ $0, R9 + MOVQ $SYS_fcntl, AX + SYSCALL + CMPQ AX, $0xfffffffffffff001 + JLS ok + MOVQ $-1, r+24(FP) + NEGQ AX + MOVQ AX, err+32(FP) + RET +ok: + MOVQ AX, r+24(FP) + MOVQ $0, err+32(FP) + RET + +TEXT runtime·fcntl2(SB),NOSPLIT,$0-40 + MOVL fd+0(FP), DI + MOVQ cmd+8(FP), SI + MOVQ arg+16(FP), DX + MOVQ $0, R10 + MOVQ $0, R8 + MOVQ $0, R9 + MOVQ $SYS_fcntl, AX + SYSCALL + CMPQ AX, $0xfffffffffffff001 + JLS ok + MOVQ $-1, r+24(FP) + NEGQ AX + MOVQ AX, err+32(FP) + RET +ok: + MOVQ AX, r+24(FP) + MOVQ $0, err+32(FP) + RET diff --git a/src/runtime/sys_linux_arm.s b/src/runtime/sys_linux_arm.s index 9c7398451c..72cbfd2030 100644 --- a/src/runtime/sys_linux_arm.s +++ b/src/runtime/sys_linux_arm.s @@ -45,11 +45,13 @@ #define SYS_epoll_ctl (SYS_BASE + 251) #define SYS_epoll_wait (SYS_BASE + 252) #define SYS_epoll_create1 (SYS_BASE + 357) +#define SYS_ioctl (SYS_BASE + 54) #define SYS_fcntl (SYS_BASE + 55) #define SYS_access (SYS_BASE + 33) #define SYS_connect (SYS_BASE + 283) #define SYS_socket (SYS_BASE + 281) #define SYS_brk (SYS_BASE + 45) +#define SYS_perf_event_open (SYS_BASE + 364) #define ARM_BASE (SYS_BASE + 0x0f0000) @@ -609,3 +611,102 @@ TEXT runtime·sbrk0(SB),NOSPLIT,$0-4 TEXT runtime·sigreturn(SB),NOSPLIT,$0-0 RET + +// psu: not tested +TEXT runtime·perfEventOpen(SB),NOSPLIT,$0-36 + MOVW attr+0(FP), R0 + MOVW pid+4((FP), R1 + MOVW cpu+8(FP), R2 + MOVW groupFd+12(FP), R3 + MOVW flags+16(FP), R4 + MOVW dummy+20(FP), R5 + MOVW $SYS_perf_event_open, R7 + SWI $0 + MOVW $0xfffff001, R6 + CMP R6, R0 + BLS ok + MOVW $-1, R1 + MOVW R1, r+24(FP) + MOVW $0, R2 + MOVW R2, r2+28(FP) + RSB $0, R0, R0 + MOVW R0, err+32(FP) + RET +ok: + MOVW R0, r+24(FP) + MOVW R1, r2+28(FP) + MOVW $0, R0 + MOVW R0, err+32(FP) + RET + +// psu: not tested +TEXT runtime·ioctl(SB),NOSPLIT,$0-20 + MOVW fd+0(FP), R0 + MOVW req+4(FP), R1 + MOVW arg+8(FP), R2 + MOVW $0, R3 + MOVW $0, R4 + MOVW $0, R5 + MOVW $SYS_ioctl, R7 + SWI $0 + MOVW $0xfffff001, R1 + CMP R1, R0 + BLS ok + MOVW $-1, R1 + MOVW R1, r+12(FP) + RSB $0, R0, R0 + MOVW R0, err+16(FP) + RET +ok: + MOVW R0, r+12(FP) + MOVW $0, R0 + MOVW R0, err+16(FP) + RET + +// psu: not tested +TEXT runtime·fcntl(SB),NOSPLIT,$0-20 + MOVW fd+0(FP), R0 + MOVW cmd+4(FP), R1 + MOVW arg+8(FP), R2 + MOVW $0, R3 + MOVW $0, R4 + MOVW $0, R5 + MOVW $SYS_fcntl, R7 + SWI $0 + MOVW $0xfffff001, R1 + CMP R1, R0 + BLS ok + MOVW $-1, R1 + MOVW R1, r+12(FP) + RSB $0, R0, R0 + MOVW R0, err+16(FP) + RET +ok: + MOVW R0, r+12(FP) + MOVW $0, R0 + MOVW R0, err+16(FP) + RET + +// psu: not tested +TEXT runtime·fcntl2(SB),NOSPLIT,$0-20 + MOVW fd+0(FP), R0 + MOVW cmd+4(FP), R1 + MOVW arg+8(FP), R2 + MOVW $0, R3 + MOVW $0, R4 + MOVW $0, R5 + MOVW $SYS_fcntl, R7 + SWI $0 + MOVW $0xfffff001, R1 + CMP R1, R0 + BLS ok + MOVW $-1, R1 + MOVW R1, r+12(FP) + RSB $0, R0, R0 + MOVW R0, err+16(FP) + RET +ok: + MOVW R0, r+12(FP) + MOVW $0, R0 + MOVW R0, err+16(FP) + RET diff --git a/src/runtime/sys_linux_arm64.s b/src/runtime/sys_linux_arm64.s index 321d74254c..2af3c8a0d1 100644 --- a/src/runtime/sys_linux_arm64.s +++ b/src/runtime/sys_linux_arm64.s @@ -21,6 +21,7 @@ #define SYS_openat 56 #define SYS_close 57 #define SYS_fcntl 25 +#define SYS_ioctl 29 #define SYS_nanosleep 101 #define SYS_mmap 222 #define SYS_munmap 215 @@ -48,6 +49,7 @@ #define SYS_socket 198 #define SYS_connect 203 #define SYS_brk 214 +#define SYS_perf_event_open 241 TEXT runtime·exit(SB),NOSPLIT|NOFRAME,$0-4 MOVW code+0(FP), R0 @@ -602,3 +604,93 @@ TEXT runtime·sbrk0(SB),NOSPLIT,$0-8 TEXT runtime·sigreturn(SB),NOSPLIT,$0-0 RET + +// psu: not tested +TEXT runtime·perfEventOpen(SB),NOSPLIT,$0-72 + MOVD attr+0(FP), R0 + MOVD pid+8((FP), R1 + MOVD cpu+16(FP), R2 + MOVD groupFd+24(FP), R3 + MOVD flags+32(FP), R4 + MOVD dummy+40(FP), R5 + MOVD $SYS_perf_event_open, R8 + SVC + CMN $4095, R0 + BCC ok + MOVD $-1, R4 + MOVW R4, r+48(FP) + MOVD ZR, r2+56(FP) + NEG R0, R0 + MOVD R0, err+64(FP) + RET +ok: + MOVW R0, r+48(FP) + MOVD R1, r2+56(FP) + MOVD ZR, err+64(FP) + RET + +// psu: not tested +TEXT runtime·ioctl(SB),NOSPLIT,$0-40 + MOVW fd+0(FP), R0 + MOVD req+8(FP), R1 + MOVD arg+16(FP), R2 + MOVD $0, R3 + MOVD $0, R4 + MOVD $0, R5 + MOVD $SYS_ioctl, R8 + SVC + CMN $4095, R0 + BCC ok + MOVD $-1, R4 + MOVD R4, r+24(FP) + NEG R0, R0 + MOVD R0, err+32(FP) + RET +ok: + MOVD R0, r+24(FP) + MOVD ZR, err+32(FP) + RET + +// psu: not tested +TEXT runtime·fcntl(SB),NOSPLIT,$0-40 + MOVW fd+0(FP), R0 + MOVD cmd+8(FP), R1 + MOVD arg+16(FP), R2 + MOVD $0, R3 + MOVD $0, R4 + MOVD $0, R5 + MOVD $SYS_fcntl, R8 + SVC + CMN $4095, R0 + BCC ok + MOVD $-1, R4 + MOVD R4, r+24(FP) + NEG R0, R0 + MOVD R0, err+32(FP) + RET +ok: + MOVD R0, r+24(FP) + MOVD ZR, err+32(FP) + RET + +// psu: not tested +TEXT runtime·fcntl2(SB),NOSPLIT,$0-40 + MOVW fd+0(FP), R0 + MOVD cmd+8(FP), R1 + MOVD arg+16(FP), R2 + MOVD $0, R3 + MOVD $0, R4 + MOVD $0, R5 + MOVD $SYS_fcntl, R8 + SVC + CMN $4095, R0 + BCC ok + MOVD $-1, R4 + MOVD R4, r+24(FP) + NEG R0, R0 + MOVD R0, err+32(FP) + RET +ok: + MOVD R0, r+24(FP) + MOVD ZR, err+32(FP) + RET diff --git a/src/runtime/sys_linux_mips64x.s b/src/runtime/sys_linux_mips64x.s index 33ed1050c2..9580812277 100644 --- a/src/runtime/sys_linux_mips64x.s +++ b/src/runtime/sys_linux_mips64x.s @@ -31,6 +31,7 @@ #define SYS_rt_sigreturn 5211 #define SYS_rt_sigaction 5013 #define SYS_rt_sigprocmask 5014 +#define SYS_ioctl 5015 #define SYS_sigaltstack 5129 #define SYS_madvise 5027 #define SYS_mincore 5026 @@ -46,6 +47,8 @@ #define SYS_clock_gettime 5222 #define SYS_epoll_create1 5285 #define SYS_brk 5012 +#define SYS_perf_event_open 5292 + TEXT runtime·exit(SB),NOSPLIT|NOFRAME,$0-4 MOVW code+0(FP), R4 @@ -477,3 +480,85 @@ TEXT runtime·socket(SB),$0-20 MOVV R0, 2(R0) // unimplemented, only needed for android; declared in stubs_linux.go MOVW R0, ret+16(FP) // for vet RET + +// psu: not tested +TEXT runtime·perfEventOpen(SB),NOSPLIT,$0-72 + MOVV attr+0(FP), R4 + MOVV pid+8(FP), R5 + MOVV cpu+16(FP), R6 + MOVV groupFd+24(FP), R7 + MOVV flags+32(FP), R8 + MOVV dummp+40(FP), R9 + MOVV $SYS_perf_event_open, R2 + SYSCALL + BEQ R7, ok + MOVV $-1, R1 + MOVV R1, r+48(FP) + MOVV R0, r2+56(FP) + MOVV R2, err+64(FP) + RET +ok: + MOVV R2, r+48(FP) + MOVV R3, r2+56(FP) + MOVV R0, err+64(FP) + RET + +// psu: not tested +TEXT runtime·ioctl,NOSPLIT,$0-40 + MOVV fd+0(FP), R4 + MOVV req+8(FP), R5 + MOVV arg+16(FP), R6 + MOVV R0, R7 + MOVV R0, R8 + MOVV R0, R9 + MOVV $SYS_ioctl, R2 + SYSCALL + BEQ R7, ok + MOVV $-1, R1 + MOVV R1, r+24(FP) + MOVV R2, err+32(FP) + RET +ok: + MOVV R2, r+24(FP) + MOVV R0, err+32(FP) + RET + +// psu: not tested +TEXT runtime·fcntl,NOSPLIT,$0-40 + MOVV fd+0(FP), R4 + MOVV cmd+8(FP), R5 + MOVV arg+16(FP), R6 + MOVV R0, R7 + MOVV R0, R8 + MOVV R0, R9 + MOVV $SYS_fcntl, R2 + SYSCALL + BEQ R7, ok + MOVV $-1, R1 + MOVV R1, r+24(FP) + MOVV R2, err+32(FP) + RET +ok: + MOVV R2, r+24(FP) + MOVV R0, err+32(FP) + RET + +// psu: not tested +TEXT runtime·fcntl2,NOSPLIT,$0-40 + MOVV fd+0(FP), R4 + MOVV cmd+8(FP), R5 + MOVV arg+16(FP), R6 + MOVV R0, R7 + MOVV R0, R8 + MOVV R0, R9 + MOVV $SYS_fcntl, R2 + SYSCALL + BEQ R7, ok + MOVV $-1, R1 + MOVV R1, r+24(FP) + MOVV R2, err+32(FP) + RET +ok: + MOVV R2, r+24(FP) + MOVV R0, err+32(FP) + RET diff --git a/src/runtime/sys_linux_ppc64x.s b/src/runtime/sys_linux_ppc64x.s index 13d23156bd..8cb7fa91ce 100644 --- a/src/runtime/sys_linux_ppc64x.s +++ b/src/runtime/sys_linux_ppc64x.s @@ -22,6 +22,7 @@ #define SYS_getpid 20 #define SYS_kill 37 #define SYS_brk 45 +#define SYS_ioctl 54 #define SYS_fcntl 55 #define SYS_mmap 90 #define SYS_munmap 91 @@ -45,6 +46,7 @@ #define SYS_clock_gettime 246 #define SYS_tgkill 250 #define SYS_epoll_create1 315 +#define SYS_perf_event_open 319 TEXT runtime·exit(SB),NOSPLIT|NOFRAME,$0-4 MOVW code+0(FP), R3 @@ -634,3 +636,85 @@ TEXT runtime·socket(SB),$0-20 MOVD R0, 0(R0) // unimplemented, only needed for android; declared in stubs_linux.go MOVW R0, ret+16(FP) // for vet RET + +// psu: not tested +TEXT runtime·perfEventOpen(SB),NOSPLIT,$0-72 + MOVD attr+0(FP), R3 + MOVD pid+8(FP), R4 + MOVD cpu+16(FP), R5 + MOVD groupFd+24(FP), R6 + MOVD flags+32(FP), R7 + MOVD dummy+40(FP), R8 + MOVD $SYS_perf_event_open, R9 + SYSCALL R9 + BVC ok + MOVD $-1, R4 + MOVW R4, r+48(FP) + MOVD R0, r2+56(FP) + MOVD R3, err+64(FP) + RET +ok: + MOVW R3, r+48(FP) + MOVD R4, r2+56(FP) + MOVD R0, err+64(FP) + RET + +// psu: not tested +TEXT runtime·ioctl(SB),NOSPLIT,$0-40 + MOVW fd+0(FP), R3 + MOVD req+8(FP), R4 + MOVD arg+16(FP), R5 + MOVD R0, R6 + MOVD R0, R7 + MOVD R0, R8 + MOVD $SYS_ioctl, R9 + SYSCALL R9 + BVC ok + MOVD $-1, R4 + MOVD R4, r+24(FP) + MOVD R3, err+32(FP) + RET +ok: + MOVD R3, r+24(FP) + MOVD R0, err+32(FP) + RET + +// psu: not tested +TEXT runtime·fcntl(SB),NOSPLIT,$0-40 + MOVW fd+0(FP), R3 + MOVD cmd+8(FP), R4 + MOVD arg+16(FP), R5 + MOVD R0, R6 + MOVD R0, R7 + MOVD R0, R8 + MOVD $SYS_fcntl, R9 + SYSCALL R9 + BVC ok + MOVD $-1, R4 + MOVD R4, r+24(FP) + MOVD R3, err+32(FP) + RET +ok: + MOVD R3, r+24(FP) + MOVD R0, err+32(FP) + RET + +// psu: not tested +TEXT runtime·fcntl2(SB),NOSPLIT,$0-40 + MOVW fd+0(FP), R3 + MOVD cmd+8(FP), R4 + MOVD arg+16(FP), R5 + MOVD R0, R6 + MOVD R0, R7 + MOVD R0, R8 + MOVD $SYS_fcntl, R9 + SYSCALL R9 + BVC ok + MOVD $-1, R4 + MOVD R4, r+24(FP) + MOVD R3, err+32(FP) + RET +ok: + MOVD R3, r+24(FP) + MOVD R0, err+32(FP) + RET diff --git a/src/runtime/sys_linux_s390x.s b/src/runtime/sys_linux_s390x.s index 58b36dff0a..9d785d856c 100644 --- a/src/runtime/sys_linux_s390x.s +++ b/src/runtime/sys_linux_s390x.s @@ -16,7 +16,8 @@ #define SYS_close 6 #define SYS_getpid 20 #define SYS_kill 37 -#define SYS_brk 45 +#define SYS_brk 45 +#define SYS_ioctl 54 #define SYS_fcntl 55 #define SYS_mmap 90 #define SYS_munmap 91 @@ -40,6 +41,7 @@ #define SYS_epoll_wait 251 #define SYS_clock_gettime 260 #define SYS_epoll_create1 327 +#define SYS_perf_event_open 331 TEXT runtime·exit(SB),NOSPLIT|NOFRAME,$0-4 MOVW code+0(FP), R2 @@ -464,3 +466,89 @@ TEXT runtime·socket(SB),$0-20 MOVD $0, 2(R0) // unimplemented, only needed for android; declared in stubs_linux.go MOVW R0, ret+16(FP) RET + +// psu: not tested +TEXT runtime·perfEventOpen(SB),NOSPLIT,$0-72 + MOVD attr+0(FP), R2 + MOVD pid+8(FP), R3 + MOVD cpu+16(FP), R4 + MOVD groupFd+24(FP), R5 + MOVD flags+32(FP), R6 + MOVD dummy+40(FP), R7 + MOVD SYS_perf_event_open, R1 + SYSCALL + MOVD $0xfffffffffffff001, R8 + CMPUBLT R2, R8, ok + MOVW $-1, r+48(FP) + MOVD $0, r2+56(FP) + NEG R2, R2 + MOVD R2, err+64(FP) + RET +ok: + MOVW R2, r+48(FP) + MOVD R3, r2+56(FP) + MOVD $0, err+64(FP) + RET + +// psu: not tested +TEXT runtime·ioctl(SB),NOSPLIT,$0-40 + MOVW fd+0(FP), R2 + MOVD req+8(FP), R3 + MOVD arg+16(FP), R4 + MOVD $0, R5 + MOVD $0, R6 + MOVD $0, R7 + MOVD $SYS_ioctl, R1 + SYSCALL + MOVD $0xfffffffffffff001, R8 + CMPUBLT R2, R8, ok + MOVD $-1, r+24(FP) + NEG R2, R2 + MOVD R2, err+32(FP) + RET +ok: + MOVD R2, r+24(FP) + MOVD $0, err+32(FP) + RET + +// psu: not tested +TEXT runtime·fcntl(SB),NOSPLIT,$0-40 + MOVW fd+0(FP), R2 + MOVD cmd+8(FP), R3 + MOVD arg+16(FP), R4 + MOVD $0, R5 + MOVD $0, R6 + MOVD $0, R7 + MOVD $SYS_fcntl, R1 + SYSCALL + MOVD $0xfffffffffffff001, R8 + CMPUBLT R2, R8, ok + MOVD $-1, r+24(FP) + NEG R2, R2 + MOVD R2, err+32(FP) + RET +ok: + MOVD R2, r+24(FP) + MOVD $0, err+32(FP) + RET + +// psu: not tested +TEXT runtime·fcntl2(SB),NOSPLIT,$0-40 + MOVW fd+0(FP), R2 + MOVD cmd+8(FP), R3 + MOVD arg+16(FP), R4 + MOVD $0, R5 + MOVD $0, R6 + MOVD $0, R7 + MOVD $SYS_fcntl, R1 + SYSCALL + MOVD $0xfffffffffffff001, R8 + CMPUBLT R2, R8, ok + MOVD $-1, r+24(FP) + NEG R2, R2 + MOVD R2, err+32(FP) + RET +ok: + MOVD R2, r+24(FP) + MOVD $0, err+32(FP) + RET diff --git a/src/testing/internal/testdeps/deps.go b/src/testing/internal/testdeps/deps.go index 14512e9632..546a92fef1 100644 --- a/src/testing/internal/testdeps/deps.go +++ b/src/testing/internal/testdeps/deps.go @@ -12,10 +12,12 @@ package testdeps import ( "bufio" + "fmt" "internal/testlog" "io" "regexp" "runtime/pprof" + "strconv" "strings" "sync" ) @@ -46,6 +48,44 @@ func (TestDeps) StopCPUProfile() { pprof.StopCPUProfile() } +func (TestDeps) StartPMUProfile(w io.Writer, event string, period int64, preciseIP int8, isKernelIncluded bool, isHvIncluded bool) error { + eventConfig := pprof.PMUEventConfig{ + Period: period, + PreciseIP: preciseIP, + IsKernelIncluded: isKernelIncluded, + IsHvIncluded: isHvIncluded, + } + switch event { + case "cycles": + return pprof.StartPMUProfile(pprof.WithProfilingPMUCycles(w, &eventConfig)) + case "instructions": + return pprof.StartPMUProfile(pprof.WithProfilingPMUInstructions(w, &eventConfig)) + case "cacheReferences": + return pprof.StartPMUProfile(pprof.WithProfilingPMUCacheReferences(w, &eventConfig)) + case "cacheMisses": + return pprof.StartPMUProfile(pprof.WithProfilingPMUCacheMisses(w, &eventConfig)) + case "cacheLLReadAccesses": + return pprof.StartPMUProfile(pprof.WithProfilingPMUCacheLLReadAccesses(w, &eventConfig)) + case "cacheLLReadMisses": + return pprof.StartPMUProfile(pprof.WithProfilingPMUCacheLLReadMisses(w, &eventConfig)) + default: + // Is this a raw event? + if strings.HasPrefix(event, "r") { + if rawHexEvent, err := strconv.ParseInt(event[1:], 16, 64); err == nil { + eventConfig.RawEvent = rawHexEvent + return pprof.StartPMUProfile(pprof.WithProfilingPMURaw(w, &eventConfig)) + } + return fmt.Errorf("Incorrect hex format for raw event") + } else { + return fmt.Errorf("Unknown or not yet implemented event") + } + } +} + +func (TestDeps) StopPMUProfile() { + pprof.StopPMUProfile() +} + func (TestDeps) WriteProfileTo(name string, w io.Writer, debug int) error { return pprof.Lookup(name).WriteTo(w, debug) } diff --git a/src/testing/testing.go b/src/testing/testing.go index 339df13f43..a5e38fd7a6 100644 --- a/src/testing/testing.go +++ b/src/testing/testing.go @@ -285,6 +285,12 @@ func Init() { memProfile = flag.String("test.memprofile", "", "write an allocation profile to `file`") memProfileRate = flag.Int("test.memprofilerate", 0, "set memory allocation profiling `rate` (see runtime.MemProfileRate)") cpuProfile = flag.String("test.cpuprofile", "", "write a cpu profile to `file`") + pmuProfile = flag.String("test.pmuprofile", "", "write a pmu profile to `file`") + pmuEvent = flag.String("test.pmuevent", "cycles", "select a pmu event from the events: cycles, instructions, cacheReferences, cacheMisses, cacheLLReadAccesses, cacheLLReadMisses") + pmuPeriod = flag.Int64("test.pmuperiod", 10000000, "specify the sampling period for a PMU event") + pmuPreciseIP = flag.Int("test.pmupreciseip", 0, "specify the precise IP level (0-3) for a PMU event") + pmuKernelIncluded = flag.Bool("test.pmukernelincl", false, "count the kernel") + pmuHvIncluded = flag.Bool("test.pmuhvincl", false, "count the hypervisor") blockProfile = flag.String("test.blockprofile", "", "write a goroutine blocking profile to `file`") blockProfileRate = flag.Int("test.blockprofilerate", 1, "set blocking profile `rate` (see runtime.SetBlockProfileRate)") mutexProfile = flag.String("test.mutexprofile", "", "write a mutex contention profile to the named file after execution") @@ -311,6 +317,12 @@ var ( memProfile *string memProfileRate *int cpuProfile *string + pmuProfile *string + pmuEvent *string + pmuPeriod *int64 + pmuPreciseIP *int + pmuKernelIncluded *bool + pmuHvIncluded *bool blockProfile *string blockProfileRate *int mutexProfile *string @@ -1026,9 +1038,13 @@ var errMain = errors.New("testing: unexpected use of func Main") type matchStringOnly func(pat, str string) (bool, error) -func (f matchStringOnly) MatchString(pat, str string) (bool, error) { return f(pat, str) } -func (f matchStringOnly) StartCPUProfile(w io.Writer) error { return errMain } -func (f matchStringOnly) StopCPUProfile() {} +func (f matchStringOnly) MatchString(pat, str string) (bool, error) { return f(pat, str) } +func (f matchStringOnly) StartCPUProfile(w io.Writer) error { return errMain } +func (f matchStringOnly) StopCPUProfile() {} +func (f matchStringOnly) StartPMUProfile(w io.Writer, event string, period int64, preciseIP int8, isKernelIncluded bool, isHvIncluded bool) error { + return errMain +} +func (f matchStringOnly) StopPMUProfile() {} func (f matchStringOnly) WriteProfileTo(string, io.Writer, int) error { return errMain } func (f matchStringOnly) ImportPath() string { return "" } func (f matchStringOnly) StartTestLog(io.Writer) {} @@ -1066,6 +1082,8 @@ type testDeps interface { MatchString(pat, str string) (bool, error) StartCPUProfile(io.Writer) error StopCPUProfile() + StartPMUProfile(io.Writer, string, int64, int8, bool, bool) error + StopPMUProfile() StartTestLog(io.Writer) StopTestLog() error WriteProfileTo(string, io.Writer, int) error @@ -1236,6 +1254,19 @@ func (m *M) before() { } // Could save f so after can call f.Close; not worth the effort. } + if *pmuProfile != "" { + f, err := os.Create(toOutputDir(*pmuProfile)) + if err != nil { + fmt.Fprintf(os.Stderr, "testing: %s\n", err) + return + } + if err := m.deps.StartPMUProfile(f, *pmuEvent, *pmuPeriod, int8(*pmuPreciseIP), *pmuKernelIncluded, *pmuHvIncluded); err != nil { + fmt.Fprintf(os.Stderr, "testing: can't start pmu profile: %s\n", err) + f.Close() + return + } + // Could save f so after can call f.Close; not worth the effort. + } if *traceFile != "" { f, err := os.Create(toOutputDir(*traceFile)) if err != nil { @@ -1302,6 +1333,9 @@ func (m *M) writeProfiles() { if *cpuProfile != "" { m.deps.StopCPUProfile() // flushes profile to disk } + if *pmuProfile != "" { + m.deps.StopPMUProfile() // flushes profile to disk + } if *traceFile != "" { trace.Stop() // flushes trace to disk } diff --git a/test/pmu/http/test1.go b/test/pmu/http/test1.go new file mode 100644 index 0000000000..7edf4d1641 --- /dev/null +++ b/test/pmu/http/test1.go @@ -0,0 +1,27 @@ +// run +// Example of usage: +// 1. go run test1.go +// 2. go tool pprof http://localhost:6060/debug/pprof/profile?seconds=6\&pmu=true\&pmuevent=cycles\&pmuperiod=10000000 + +package main + +import ( + "fmt" + "log" + "net/http" + _ "net/http/pprof" + _ "time" +) + +var sum int + +func main() { + go func() { + log.Println(http.ListenAndServe("localhost:6060", nil)) + }() + // time.Sleep(2 * time.Second) + for i := 0; i <= 10000000000; i++ { + sum += i + } + fmt.Println(sum) +} diff --git a/test/pmu/http/test2.go b/test/pmu/http/test2.go new file mode 100644 index 0000000000..ce0b62cd38 --- /dev/null +++ b/test/pmu/http/test2.go @@ -0,0 +1,53 @@ +// run +// Example of usage: +// 1. go run test2.go +// 2. go tool pprof http://localhost:6060/debug/pprof/profile?seconds=6\&pmu=true\&pmuevent=instructions\&pmuperiod=10000000 + +package main + +import ( + "fmt" + "log" + "net/http" + _ "net/http/pprof" + "sync" +) + +var wg sync.WaitGroup +var mux sync.Mutex +var sum int + +func f(i int) { + defer wg.Done() + var local int + for j := i; j < 100000000; j++ { + local -= j / 2 + local *= j + mux.Lock() + sum += local + mux.Unlock() + } +} + +func run() error { + go func() { + log.Println(http.ListenAndServe("localhost:6060", nil)) + }() + + wg.Add(1000) + defer wg.Wait() + + for i := 0; i < 1000; i++ { + go f(i) + } + + return nil +} + +func main() { + if err := run(); err != nil { + log.Fatal(err) + } + + fmt.Println(sum) +} diff --git a/test/pmu/http/test3.go b/test/pmu/http/test3.go new file mode 100644 index 0000000000..65e37bc07e --- /dev/null +++ b/test/pmu/http/test3.go @@ -0,0 +1,51 @@ +// run +// Example of usage: +// 1. go run test3.go +// 2. go tool pprof http://localhost:6060/debug/pprof/profile?seconds=6\&pmu=true\&pmuevent=cacheMisses\&pmuperiod=10000000 + +package main + +import ( + "fmt" + "log" + "net/http" + _ "net/http/pprof" + "sync" + "time" +) + +var wg sync.WaitGroup +var mux sync.Mutex +var sum int + +func f(i int) { + defer wg.Done() + for j := i; j < 100000000; j++ { + sum -= j / 2 + sum *= j + time.Sleep(time.Microsecond) + } +} + +func run() error { + go func() { + log.Println(http.ListenAndServe("localhost:6060", nil)) + }() + + wg.Add(1000) + defer wg.Wait() + + for i := 0; i < 1000; i++ { + go f(i) + } + + return nil +} + +func main() { + if err := run(); err != nil { + log.Fatal(err) + } + + fmt.Println(sum) +} diff --git a/test/pmu/http/test4.go b/test/pmu/http/test4.go new file mode 100644 index 0000000000..dd4c4950c3 --- /dev/null +++ b/test/pmu/http/test4.go @@ -0,0 +1,25 @@ +// run +// Example of Usage: +// 1. go run test4.go +// 2. go tool pprof http://localhost:6060/debug/pprof/profile?seconds=6\&pmu=true\&pmuevent=r53010e\&pmuperiod=1000000 + +package main + +import ( + "fmt" + "log" + "net/http" + _ "net/http/pprof" +) + +var sum int + +func main() { + go func() { + log.Println(http.ListenAndServe("localhost:6060", nil)) + }() + for i := 0; i <= 10000000000; i++ { + sum += i + } + fmt.Println(sum) +} diff --git a/test/pmu/leak/test1.go b/test/pmu/leak/test1.go new file mode 100644 index 0000000000..ee9911e7fb --- /dev/null +++ b/test/pmu/leak/test1.go @@ -0,0 +1,39 @@ +// run + +package main + +import ( + "fmt" + "log" + "os" + "runtime/pprof" +) + +var sum int + +func run() error { + for /*i := 0; i < 10; i++*/ { + itimerFile, err := os.Create("itimer_profile") + if err != nil { + return err + } + + if err = pprof.StartCPUProfile(itimerFile); err != nil { + return err + } + for j := 0; j < 100; j++ { + sum += j + } + pprof.StopCPUProfile() + itimerFile.Close() + } + fmt.Println(sum) + + return nil +} + +func main() { + if err := run(); err != nil { + log.Fatal(err) + } +} diff --git a/test/pmu/leak/test2.go b/test/pmu/leak/test2.go new file mode 100644 index 0000000000..a5dbf84e83 --- /dev/null +++ b/test/pmu/leak/test2.go @@ -0,0 +1,44 @@ +// run + +package main + +import ( + "fmt" + "log" + "os" + "runtime/pprof" +) + +var sum int + +func run() error { + for /*i := 0; i < 100000; i++*/ { + cycleFile, err := os.Create("cycle_profile") + if err != nil { + return err + } + + var cycle pprof.PMUEventConfig + cycle.Period = 100000 + cycle.IsKernelIncluded = false + cycle.IsHvIncluded = false + + if err = pprof.StartPMUProfile(pprof.WithProfilingPMUCycles(cycleFile, &cycle)); err != nil { + return err + } + for j := 0; j < 10000000; j++ { + sum += j + } + pprof.StopPMUProfile() + cycleFile.Close() + } + fmt.Println(sum) + + return nil +} + +func main() { + if err := run(); err != nil { + log.Fatal(err) + } +} diff --git a/test/pmu/leak/test3.go b/test/pmu/leak/test3.go new file mode 100644 index 0000000000..bea689905f --- /dev/null +++ b/test/pmu/leak/test3.go @@ -0,0 +1,58 @@ +// run + +package main + +import ( + "fmt" + "log" + "os" + "runtime/pprof" +) + +var sum int + +func run() error { + for /*i := 0; i < 10; i++*/ { + itimerFile, err := os.Create("itimer_profile") + if err != nil { + return err + } + + if err = pprof.StartCPUProfile(itimerFile); err != nil { + return err + } + for j := 0; j < 10000000; j++ { + sum += j + } + pprof.StopCPUProfile() + itimerFile.Close() + + cycleFile, err := os.Create("cycle_profile") + if err != nil { + return err + } + + var cycle pprof.PMUEventConfig + cycle.Period = 100000 + cycle.IsKernelIncluded = false + cycle.IsHvIncluded = false + + if err = pprof.StartPMUProfile(pprof.WithProfilingPMUCycles(cycleFile, &cycle)); err != nil { + return err + } + for j := 0; j < 10000000; j++ { + sum += j + } + pprof.StopPMUProfile() + cycleFile.Close() + } + fmt.Println(sum) + + return nil +} + +func main() { + if err := run(); err != nil { + log.Fatal(err) + } +} diff --git a/test/pmu/leak/test4.go b/test/pmu/leak/test4.go new file mode 100644 index 0000000000..cb12c65136 --- /dev/null +++ b/test/pmu/leak/test4.go @@ -0,0 +1,64 @@ +// run + +package main + +import ( + "fmt" + "log" + "os" + "runtime/pprof" +) + +var sum int + +func run() error { + for /*i := 0; i < 100; i++*/ { + cycleFile, err := os.Create("cycle_profile") + if err != nil { + return err + } + instrFile, err := os.Create("instr_profile") + if err != nil { + return err + } + cacheRefFile, err := os.Create("cacheRef_profile") + if err != nil { + return err + } + cacheMissFile, err := os.Create("cacheMiss_profile") + if err != nil { + return err + } + + var cycle pprof.PMUEventConfig + cycle.Period = 1000000 + var instr pprof.PMUEventConfig + instr.Period = 1000000 + var cacheRef pprof.PMUEventConfig + cacheRef.Period = 100 + var cacheMiss pprof.PMUEventConfig + cacheMiss.Period = 100 + + if err := pprof.StartPMUProfile(pprof.WithProfilingPMUCycles(cycleFile, &cycle), pprof.WithProfilingPMUInstructions(instrFile, &instr), pprof.WithProfilingPMUCacheReferences(cacheRefFile, &cacheRef), pprof.WithProfilingPMUCacheMisses(cacheMissFile, &cacheMiss)); err != nil { + return err + } + for j := 0; j < 10000000; j++ { + sum += j + } + pprof.StopPMUProfile() + + cycleFile.Close() + instrFile.Close() + cacheRefFile.Close() + cacheMissFile.Close() + } + fmt.Println(sum) + + return nil +} + +func main() { + if err := run(); err != nil { + log.Fatal(err) + } +} diff --git a/test/pmu/leak/test5.go b/test/pmu/leak/test5.go new file mode 100644 index 0000000000..5fb8a323d7 --- /dev/null +++ b/test/pmu/leak/test5.go @@ -0,0 +1,72 @@ +// run + +package main + +import ( + "fmt" + "log" + "os" + "runtime/pprof" + "sync" +) + +var wg sync.WaitGroup +var sum int + +func run() { + defer wg.Done() + for i := 0; i < 10000000; i++ { + sum += i + } +} + +func main() { + wg.Add(1000) + + cycleFile, err := os.Create("cycle_profile") + if err != nil { + log.Fatal(err) + return + } + instrFile, err := os.Create("instr_profile") + if err != nil { + log.Fatal(err) + return + } + cacheRefFile, err := os.Create("cacheRef_profile") + if err != nil { + log.Fatal(err) + return + } + cacheMissFile, err := os.Create("cacheMiss_profile") + if err != nil { + log.Fatal(err) + return + } + + var cycle pprof.PMUEventConfig + cycle.Period = 1000000 + var instr pprof.PMUEventConfig + instr.Period = 1000000 + var cacheRef pprof.PMUEventConfig + cacheRef.Period = 100 + var cacheMiss pprof.PMUEventConfig + cacheMiss.Period = 1 + + if err := pprof.StartPMUProfile(pprof.WithProfilingPMUCycles(cycleFile, &cycle), pprof.WithProfilingPMUInstructions(instrFile, &instr), pprof.WithProfilingPMUCacheReferences(cacheRefFile, &cacheRef), pprof.WithProfilingPMUCacheMisses(cacheMissFile, &cacheMiss)); err != nil { + log.Fatal(err) + } + + for i := 0; i < 1000; i++ { + go run() + } + + wg.Wait() + fmt.Println(sum) + pprof.StopPMUProfile() + + cycleFile.Close() + instrFile.Close() + cacheRefFile.Close() + cacheMissFile.Close() +} diff --git a/test/pmu/leak/test6.go b/test/pmu/leak/test6.go new file mode 100644 index 0000000000..cd15c08e49 --- /dev/null +++ b/test/pmu/leak/test6.go @@ -0,0 +1,68 @@ +// run + +package main + +import ( + "fmt" + "os" + "runtime/pprof" + "time" +) + +var sum int + +func run() { + for { + cycleFile, err := os.Create("cycle_profile") + if err != nil { + return + } + + instrFile, err := os.Create("instr_profile") + if err != nil { + return + } + + cacheRefFile, err := os.Create("cacheRef_profile") + if err != nil { + return + } + + cacheMissFile, err := os.Create("cacheMiss_profile") + if err != nil { + return + } + + var cycle pprof.PMUEventConfig + cycle.Period = 1000000 + var instr pprof.PMUEventConfig + instr.Period = 1000000 + var cacheRef pprof.PMUEventConfig + cacheRef.Period = 100 + var cacheMiss pprof.PMUEventConfig + cacheMiss.Period = 1 + + if err := pprof.StartPMUProfile(pprof.WithProfilingPMUCycles(cycleFile, &cycle), pprof.WithProfilingPMUInstructions(instrFile, &instr), pprof.WithProfilingPMUCacheReferences(cacheRefFile, &cacheRef), pprof.WithProfilingPMUCacheMisses(cacheMissFile, &cacheMiss)); err != nil { + return + } + + for i := 0; i < 100000000; i++ { + sum += i + } + pprof.StopPMUProfile() + + cycleFile.Close() + instrFile.Close() + cacheRefFile.Close() + cacheMissFile.Close() + } +} + +func main() { + for i := 0; i < 100; i++ { + go run() + } + + time.Sleep(time.Hour) + fmt.Println(sum) +} diff --git a/test/pmu/test1.go b/test/pmu/test1.go new file mode 100644 index 0000000000..c28c4cbd7d --- /dev/null +++ b/test/pmu/test1.go @@ -0,0 +1,193 @@ +// run +// Example of usage: go run test1.go +// Flamegraph: go tool pprof -relative_percentages -http=":8081" cycle_profile +package main + +import ( + "fmt" + "log" + "os" + "runtime/pprof" + "sync" +) + +var wg sync.WaitGroup + +func f1() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f2() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f3() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f4() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f5() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f6() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f7() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f8() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f9() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f10() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func run() error { + cycleFile, err := os.Create("cycle_profile") + if err != nil { + return err + } + defer cycleFile.Close() + + var cycle pprof.PMUEventConfig + cycle.Period = 10000000 + cycle.PreciseIP = 2 + + if err := pprof.StartPMUProfile(pprof.WithProfilingPMUCycles(cycleFile, &cycle)); err != nil { + return err + } + defer pprof.StopPMUProfile() + + wg.Add(10) + defer wg.Wait() + + go f1() + go f2() + go f3() + go f4() + go f5() + go f6() + go f7() + go f8() + go f9() + go f10() + + return nil +} + +func main() { + if err := run(); err != nil { + log.Fatal(err) + } +} diff --git a/test/pmu/test1.out b/test/pmu/test1.out new file mode 100644 index 0000000000..9546171295 --- /dev/null +++ b/test/pmu/test1.out @@ -0,0 +1,10 @@ +44438869911 +44438869911 +44438869911 +44438869911 +44438869911 +44438869911 +44438869911 +44438869911 +44438869911 +44438869911 diff --git a/test/pmu/test2.go b/test/pmu/test2.go new file mode 100644 index 0000000000..3d138bbeff --- /dev/null +++ b/test/pmu/test2.go @@ -0,0 +1,223 @@ +// run +// Example of usage: go run test2.go + +package main + +import ( + "fmt" + "log" + "os" + "runtime/pprof" + "sync" + _ "time" +) + +var wg sync.WaitGroup + +var racy int32 + +func f1() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f2() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f3() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f4() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f5() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f6() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f7() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f8() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f9() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f10() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func run() error { + cycleFile, err := os.Create("cycle_profile") + if err != nil { + return err + } + defer cycleFile.Close() + + var cycle pprof.PMUEventConfig + cycle.Period = 100000000 + + instrFile, err := os.Create("instr_profile") + if err != nil { + return err + } + defer instrFile.Close() + + var instr pprof.PMUEventConfig + instr.Period = 100000000 + + cacheMissFile, err := os.Create("cacheMiss_profile") + if err != nil { + return err + } + defer cacheMissFile.Close() + + var cacheMiss pprof.PMUEventConfig + cacheMiss.Period = 10 + + cacheRefFile, err := os.Create("cacheRef_profile") + if err != nil { + return err + } + defer cacheRefFile.Close() + + var cacheRef pprof.PMUEventConfig + cacheRef.Period = 1000 + + if err := pprof.StartPMUProfile(pprof.WithProfilingPMUCycles(cycleFile, &cycle), pprof.WithProfilingPMUInstructions(instrFile, &instr), pprof.WithProfilingPMUCacheReferences(cacheRefFile, &cacheRef), pprof.WithProfilingPMUCacheMisses(cacheMissFile, &cacheMiss)); err != nil { + return err + } + + defer pprof.StopPMUProfile() + + wg.Add(10) + defer wg.Wait() + + go f1() + go f2() + go f3() + go f4() + go f5() + go f6() + go f7() + go f8() + go f9() + go f10() + + return nil +} + +func main() { + if err := run(); err != nil { + log.Fatal(err) + } +} diff --git a/test/pmu/test2.out b/test/pmu/test2.out new file mode 100644 index 0000000000..9546171295 --- /dev/null +++ b/test/pmu/test2.out @@ -0,0 +1,10 @@ +44438869911 +44438869911 +44438869911 +44438869911 +44438869911 +44438869911 +44438869911 +44438869911 +44438869911 +44438869911 diff --git a/test/pmu/test3.go b/test/pmu/test3.go new file mode 100644 index 0000000000..2558eb05bc --- /dev/null +++ b/test/pmu/test3.go @@ -0,0 +1,189 @@ +// run +// Example of usage: go run test3.go + +package main + +import ( + "fmt" + "log" + "os" + "runtime/pprof" + "sync" +) + +var wg sync.WaitGroup + +func f1() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f2() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f3() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f4() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f5() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f6() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f7() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f8() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f9() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f10() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func run() error { + itimerFile, err := os.Create("itimer_profile") + if err != nil { + return err + } + defer itimerFile.Close() + + if err := pprof.StartCPUProfile(itimerFile); err != nil { + return err + } + defer pprof.StopCPUProfile() + + wg.Add(10) + defer wg.Wait() + + go f1() + go f2() + go f3() + go f4() + go f5() + go f6() + go f7() + go f8() + go f9() + go f10() + + return nil +} + +func main() { + if err := run(); err != nil { + log.Fatal(err) + } +} diff --git a/test/pmu/test3.out b/test/pmu/test3.out new file mode 100644 index 0000000000..9546171295 --- /dev/null +++ b/test/pmu/test3.out @@ -0,0 +1,10 @@ +44438869911 +44438869911 +44438869911 +44438869911 +44438869911 +44438869911 +44438869911 +44438869911 +44438869911 +44438869911 diff --git a/test/pmu/test4.go b/test/pmu/test4.go new file mode 100644 index 0000000000..b2d33b0eab --- /dev/null +++ b/test/pmu/test4.go @@ -0,0 +1,193 @@ +// run +// Example of usage: go run test4.go + +package main + +import ( + "fmt" + "log" + "os" + "runtime/pprof" + "sync" +) + +var wg sync.WaitGroup + +func f1() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f2() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f3() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f4() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f5() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f6() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f7() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f8() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f9() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func f10() { + defer wg.Done() + + var sum int + for i := 0; i < 500000000; i++ { + sum -= i / 2 + sum *= i + sum /= i/3 + 1 + sum -= i / 4 + } + + fmt.Println(sum) +} + +func run() error { + rawFile, err := os.Create("raw_profile") + if err != nil { + return err + } + defer rawFile.Close() + + var raw pprof.PMUEventConfig + raw.RawEvent = 0x53010e /* UOPS_ISSUED */ + raw.Period = 1000000 + + if err := pprof.StartPMUProfile(pprof.WithProfilingPMURaw(rawFile, &raw)); err != nil { + return err + } + defer pprof.StopPMUProfile() + + wg.Add(10) + defer wg.Wait() + + go f1() + go f2() + go f3() + go f4() + go f5() + go f6() + go f7() + go f8() + go f9() + go f10() + + return nil +} + +func main() { + if err := run(); err != nil { + log.Fatal(err) + } +} diff --git a/test/pmu/test4.out b/test/pmu/test4.out new file mode 100644 index 0000000000..9546171295 --- /dev/null +++ b/test/pmu/test4.out @@ -0,0 +1,10 @@ +44438869911 +44438869911 +44438869911 +44438869911 +44438869911 +44438869911 +44438869911 +44438869911 +44438869911 +44438869911 diff --git a/test/run.go b/test/run.go index 28ed865c50..f6c42fe1c0 100644 --- a/test/run.go +++ b/test/run.go @@ -50,6 +50,7 @@ var ( // dirs are the directories to look for *.go files in. // TODO(bradfitz): just use all directories? + // dirs = []string{".", "ken", "chan", "interface", "syntax", "dwarf", "fixedbugs", "codegen", "runtime", "pmu"} dirs = []string{".", "ken", "chan", "interface", "syntax", "dwarf", "fixedbugs", "codegen", "runtime"} // ratec controls the max number of tests running at a time.