0
0
Fork 0
mirror of https://github.com/crazy-max/diun.git synced 2025-01-08 17:53:09 +00:00
crazy-max_diun/vendor/github.com/felixge/fgprof/fgprof.go
2024-12-14 22:30:21 +01:00

307 lines
8.1 KiB
Go

// fgprof is a sampling Go profiler that allows you to analyze On-CPU as well
// as [Off-CPU](http://www.brendangregg.com/offcpuanalysis.html) (e.g. I/O)
// time together.
package fgprof
import (
"fmt"
"io"
"runtime"
"sort"
"strings"
"time"
"github.com/google/pprof/profile"
)
// Format decides how the output is rendered to the user.
type Format string
const (
// FormatFolded is used by Brendan Gregg's FlameGraph utility, see
// https://github.com/brendangregg/FlameGraph#2-fold-stacks.
FormatFolded Format = "folded"
// FormatPprof is used by Google's pprof utility, see
// https://github.com/google/pprof/blob/master/proto/README.md.
FormatPprof Format = "pprof"
)
// Start begins profiling the goroutines of the program and returns a function
// that needs to be invoked by the caller to stop the profiling and write the
// results to w using the given format.
func Start(w io.Writer, format Format) func() error {
startTime := time.Now()
// Go's CPU profiler uses 100hz, but 99hz might be less likely to result in
// accidental synchronization with the program we're profiling.
const hz = 99
ticker := time.NewTicker(time.Second / hz)
stopCh := make(chan struct{})
prof := &profiler{}
profile := newWallclockProfile()
go func() {
defer ticker.Stop()
for {
select {
case <-ticker.C:
stacks := prof.GoroutineProfile()
profile.Add(stacks)
case <-stopCh:
return
}
}
}()
return func() error {
stopCh <- struct{}{}
endTime := time.Now()
profile.Ignore(prof.SelfFrames()...)
return profile.Export(w, format, hz, startTime, endTime)
}
}
// profiler provides a convenient and performant way to access
// runtime.GoroutineProfile().
type profiler struct {
stacks []runtime.StackRecord
selfFrame *runtime.Frame
}
// GoroutineProfile returns the stacks of all goroutines currently managed by
// the scheduler. This includes both goroutines that are currently running
// (On-CPU), as well as waiting (Off-CPU).
func (p *profiler) GoroutineProfile() []runtime.StackRecord {
if p.selfFrame == nil {
// Determine the runtime.Frame of this func so we can hide it from our
// profiling output.
rpc := make([]uintptr, 1)
n := runtime.Callers(1, rpc)
if n < 1 {
panic("could not determine selfFrame")
}
selfFrame, _ := runtime.CallersFrames(rpc).Next()
p.selfFrame = &selfFrame
}
// We don't know how many goroutines exist, so we have to grow p.stacks
// dynamically. We overshoot by 10% since it's possible that more goroutines
// are launched in between two calls to GoroutineProfile. Once p.stacks
// reaches the maximum number of goroutines used by the program, it will get
// reused indefinitely, eliminating GoroutineProfile calls and allocations.
//
// TODO(fg) There might be workloads where it would be nice to shrink
// p.stacks dynamically as well, but let's not over-engineer this until we
// understand those cases better.
for {
n, ok := runtime.GoroutineProfile(p.stacks)
if !ok {
p.stacks = make([]runtime.StackRecord, int(float64(n)*1.1))
} else {
return p.stacks[0:n]
}
}
}
// SelfFrames returns frames that belong to the profiler so that we can ignore
// them when exporting the final profile.
func (p *profiler) SelfFrames() []*runtime.Frame {
if p.selfFrame != nil {
return []*runtime.Frame{p.selfFrame}
}
return nil
}
func newWallclockProfile() *wallclockProfile {
return &wallclockProfile{stacks: map[[32]uintptr]*wallclockStack{}}
}
// wallclockProfile holds a wallclock profile that can be exported in different
// formats.
type wallclockProfile struct {
stacks map[[32]uintptr]*wallclockStack
ignore []*runtime.Frame
}
// wallclockStack holds the symbolized frames of a stack trace and the number
// of times it has been seen.
type wallclockStack struct {
frames []*runtime.Frame
count int
}
// Ignore sets a list of frames that should be ignored when exporting the
// profile.
func (p *wallclockProfile) Ignore(frames ...*runtime.Frame) {
p.ignore = frames
}
// Add adds the given stack traces to the profile.
func (p *wallclockProfile) Add(stackRecords []runtime.StackRecord) {
for _, stackRecord := range stackRecords {
if _, ok := p.stacks[stackRecord.Stack0]; !ok {
ws := &wallclockStack{}
// symbolize pcs into frames
frames := runtime.CallersFrames(stackRecord.Stack())
for {
frame, more := frames.Next()
ws.frames = append(ws.frames, &frame)
if !more {
break
}
}
p.stacks[stackRecord.Stack0] = ws
}
p.stacks[stackRecord.Stack0].count++
}
}
func (p *wallclockProfile) Export(w io.Writer, f Format, hz int, startTime, endTime time.Time) error {
switch f {
case FormatFolded:
return p.exportFolded(w)
case FormatPprof:
return p.exportPprof(hz, startTime, endTime).Write(w)
default:
return fmt.Errorf("unknown format: %q", f)
}
}
// exportStacks returns the stacks in this profile except those that have been
// set to Ignore().
func (p *wallclockProfile) exportStacks() []*wallclockStack {
stacks := make([]*wallclockStack, 0, len(p.stacks))
nextStack:
for _, ws := range p.stacks {
for _, f := range ws.frames {
for _, igf := range p.ignore {
if f.Entry == igf.Entry {
continue nextStack
}
}
}
stacks = append(stacks, ws)
}
return stacks
}
func (p *wallclockProfile) exportFolded(w io.Writer) error {
var lines []string
stacks := p.exportStacks()
for _, ws := range stacks {
var foldedStack []string
for _, f := range ws.frames {
foldedStack = append(foldedStack, f.Function)
}
line := fmt.Sprintf("%s %d", strings.Join(foldedStack, ";"), ws.count)
lines = append(lines, line)
}
sort.Strings(lines)
_, err := io.WriteString(w, strings.Join(lines, "\n")+"\n")
return err
}
func (p *wallclockProfile) exportPprof(hz int, startTime, endTime time.Time) *profile.Profile {
prof := &profile.Profile{}
m := &profile.Mapping{ID: 1, HasFunctions: true}
prof.Period = int64(1e9 / hz) // Number of nanoseconds between samples.
prof.TimeNanos = startTime.UnixNano()
prof.DurationNanos = int64(endTime.Sub(startTime))
prof.Mapping = []*profile.Mapping{m}
prof.SampleType = []*profile.ValueType{
{
Type: "samples",
Unit: "count",
},
{
Type: "time",
Unit: "nanoseconds",
},
}
prof.PeriodType = &profile.ValueType{
Type: "wallclock",
Unit: "nanoseconds",
}
type functionKey struct {
Name string
Filename string
}
funcIdx := map[functionKey]*profile.Function{}
type locationKey struct {
Function functionKey
Line int
}
locationIdx := map[locationKey]*profile.Location{}
for _, ws := range p.exportStacks() {
sample := &profile.Sample{
Value: []int64{
int64(ws.count),
int64(1000 * 1000 * 1000 / hz * ws.count),
},
}
for _, frame := range ws.frames {
fnKey := functionKey{Name: frame.Function, Filename: frame.File}
function, ok := funcIdx[fnKey]
if !ok {
function = &profile.Function{
ID: uint64(len(prof.Function)) + 1,
Name: frame.Function,
SystemName: frame.Function,
Filename: frame.File,
}
funcIdx[fnKey] = function
prof.Function = append(prof.Function, function)
}
locKey := locationKey{Function: fnKey, Line: frame.Line}
location, ok := locationIdx[locKey]
if !ok {
location = &profile.Location{
ID: uint64(len(prof.Location)) + 1,
Mapping: m,
Line: []profile.Line{{
Function: function,
Line: int64(frame.Line),
}},
}
locationIdx[locKey] = location
prof.Location = append(prof.Location, location)
}
sample.Location = append(sample.Location, location)
}
prof.Sample = append(prof.Sample, sample)
}
return prof
}
type symbolizedStacks map[[32]uintptr][]frameCount
func (w wallclockProfile) Symbolize(exclude *runtime.Frame) symbolizedStacks {
m := make(symbolizedStacks)
outer:
for stack0, ws := range w.stacks {
frames := runtime.CallersFrames((&runtime.StackRecord{Stack0: stack0}).Stack())
for {
frame, more := frames.Next()
if frame.Entry == exclude.Entry {
continue outer
}
m[stack0] = append(m[stack0], frameCount{Frame: &frame, Count: ws.count})
if !more {
break
}
}
}
return m
}
type frameCount struct {
*runtime.Frame
Count int
}