Commit 1ac09ef2 by Ian Lance Taylor

libgo: reduce overhead for memory/block/mutex profiling

    
    Revise the gccgo version of memory/block/mutex profiling to reduce
    runtime overhead. The main change is to collect raw stack traces while
    the profile is on line, then post-process the stacks just prior to the
    point where we are ready to use the final product. Memory profiling
    (at a very low sampling rate) is enabled by default, and the overhead
    of the symbolization / DWARF-reading from backtrace_full was slowing
    things down relative to the main Go runtime.
    
    Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/171497

From-SVN: r271172
parent ce9f305e
3f015e128bf6d1d9279f3d43e26f60f0927019cb 6112f9b8fa9d57d2db8a709cc8b44a94d778d08a
The first line of this file holds the git revision number of the last The first line of this file holds the git revision number of the last
merge done from the gofrontend repository. merge done from the gofrontend repository.
...@@ -437,17 +437,15 @@ func dumpmemstats() { ...@@ -437,17 +437,15 @@ func dumpmemstats() {
dumpint(uint64(memstats.numgc)) dumpint(uint64(memstats.numgc))
} }
func dumpmemprof_callback(b *bucket, nstk uintptr, pstk *location, size, allocs, frees uintptr) { func dumpmemprof_callback(b *bucket, nstk uintptr, pstk *uintptr, size, allocs, frees uintptr) {
stk := (*[100000]location)(unsafe.Pointer(pstk)) stk := (*[100000]uintptr)(unsafe.Pointer(pstk))
dumpint(tagMemProf) dumpint(tagMemProf)
dumpint(uint64(uintptr(unsafe.Pointer(b)))) dumpint(uint64(uintptr(unsafe.Pointer(b))))
dumpint(uint64(size)) dumpint(uint64(size))
dumpint(uint64(nstk)) dumpint(uint64(nstk))
for i := uintptr(0); i < nstk; i++ { for i := uintptr(0); i < nstk; i++ {
pc := stk[i].pc pc := stk[i]
fn := stk[i].function fn, file, line, _ := funcfileline(pc, -1)
file := stk[i].filename
line := stk[i].lineno
if fn == "" { if fn == "" {
var buf [64]byte var buf [64]byte
n := len(buf) n := len(buf)
......
...@@ -1085,7 +1085,7 @@ func scanstackblockwithmap(pc, b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) { ...@@ -1085,7 +1085,7 @@ func scanstackblockwithmap(pc, b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) {
span != nil && span.state != mSpanManual && span != nil && span.state != mSpanManual &&
(obj < span.base() || obj >= span.limit || span.state != mSpanInUse) { (obj < span.base() || obj >= span.limit || span.state != mSpanInUse) {
print("runtime: found in object at *(", hex(b), "+", hex(i), ") = ", hex(obj), ", pc=", hex(pc), "\n") print("runtime: found in object at *(", hex(b), "+", hex(i), ") = ", hex(obj), ", pc=", hex(pc), "\n")
name, file, line := funcfileline(pc, -1) name, file, line, _ := funcfileline(pc, -1)
print(name, "\n", file, ":", line, "\n") print(name, "\n", file, ":", line, "\n")
//gcDumpObject("object", b, i) //gcDumpObject("object", b, i)
throw("found bad pointer in Go stack (incorrect use of unsafe or cgo?)") throw("found bad pointer in Go stack (incorrect use of unsafe or cgo?)")
......
...@@ -53,7 +53,7 @@ var indexError = error(errorString("index out of range")) ...@@ -53,7 +53,7 @@ var indexError = error(errorString("index out of range"))
// entire runtime stack for easier debugging. // entire runtime stack for easier debugging.
func panicindex() { func panicindex() {
name, _, _ := funcfileline(getcallerpc()-1, -1) name, _, _, _ := funcfileline(getcallerpc()-1, -1)
if hasPrefix(name, "runtime.") { if hasPrefix(name, "runtime.") {
throw(string(indexError.(errorString))) throw(string(indexError.(errorString)))
} }
...@@ -64,7 +64,7 @@ func panicindex() { ...@@ -64,7 +64,7 @@ func panicindex() {
var sliceError = error(errorString("slice bounds out of range")) var sliceError = error(errorString("slice bounds out of range"))
func panicslice() { func panicslice() {
name, _, _ := funcfileline(getcallerpc()-1, -1) name, _, _, _ := funcfileline(getcallerpc()-1, -1)
if hasPrefix(name, "runtime.") { if hasPrefix(name, "runtime.") {
throw(string(sliceError.(errorString))) throw(string(sliceError.(errorString)))
} }
......
...@@ -360,6 +360,10 @@ func hasPrefix(s, prefix string) bool { ...@@ -360,6 +360,10 @@ func hasPrefix(s, prefix string) bool {
return len(s) >= len(prefix) && s[:len(prefix)] == prefix return len(s) >= len(prefix) && s[:len(prefix)] == prefix
} }
func hasSuffix(s, suffix string) bool {
return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix
}
const ( const (
maxUint = ^uint(0) maxUint = ^uint(0)
maxInt = int(maxUint >> 1) maxInt = int(maxUint >> 1)
......
...@@ -79,7 +79,7 @@ func (ci *Frames) Next() (frame Frame, more bool) { ...@@ -79,7 +79,7 @@ func (ci *Frames) Next() (frame Frame, more bool) {
// Subtract 1 from PC to undo the 1 we added in callback in // Subtract 1 from PC to undo the 1 we added in callback in
// go-callers.c. // go-callers.c.
function, file, line := funcfileline(pc-1, int32(i)) function, file, line, _ := funcfileline(pc-1, int32(i))
if function == "" && file == "" { if function == "" && file == "" {
return Frame{}, more return Frame{}, more
} }
...@@ -158,7 +158,7 @@ const ( ...@@ -158,7 +158,7 @@ const (
// the a *Func describing the innermost function, but with an entry // the a *Func describing the innermost function, but with an entry
// of the outermost function. // of the outermost function.
func FuncForPC(pc uintptr) *Func { func FuncForPC(pc uintptr) *Func {
name, _, _ := funcfileline(pc, -1) name, _, _, _ := funcfileline(pc, -1)
if name == "" { if name == "" {
return nil return nil
} }
...@@ -187,7 +187,7 @@ func (f *Func) Entry() uintptr { ...@@ -187,7 +187,7 @@ func (f *Func) Entry() uintptr {
// The result will not be accurate if pc is not a program // The result will not be accurate if pc is not a program
// counter within f. // counter within f.
func (f *Func) FileLine(pc uintptr) (file string, line int) { func (f *Func) FileLine(pc uintptr) (file string, line int) {
_, file, line = funcfileline(pc, -1) _, file, line, _ = funcfileline(pc, -1)
return file, line return file, line
} }
...@@ -261,5 +261,5 @@ func demangleSymbol(s string) string { ...@@ -261,5 +261,5 @@ func demangleSymbol(s string) string {
} }
// implemented in go-caller.c // implemented in go-caller.c
func funcfileline(uintptr, int32) (string, string, int) func funcfileline(uintptr, int32) (string, string, int, int)
func funcentry(uintptr) uintptr func funcentry(uintptr) uintptr
...@@ -20,7 +20,7 @@ func printcreatedby(gp *g) { ...@@ -20,7 +20,7 @@ func printcreatedby(gp *g) {
if entry != 0 && tracepc > entry { if entry != 0 && tracepc > entry {
tracepc -= sys.PCQuantum tracepc -= sys.PCQuantum
} }
function, file, line := funcfileline(tracepc, -1) function, file, line, _ := funcfileline(tracepc, -1)
if function != "" && showframe(function, gp, false) && gp.goid != 1 { if function != "" && showframe(function, gp, false) && gp.goid != 1 {
printcreatedby1(function, file, line, entry, pc) printcreatedby1(function, file, line, entry, pc)
} }
...@@ -61,6 +61,16 @@ func callers(skip int, locbuf []location) int { ...@@ -61,6 +61,16 @@ func callers(skip int, locbuf []location) int {
return int(n) return int(n)
} }
//go:noescape
//extern runtime_callersRaw
func c_callersRaw(skip int32, pcs *uintptr, max int32) int32
// callersRaw returns a raw (PCs only) stack trace of the current goroutine.
func callersRaw(skip int, pcbuf []uintptr) int {
n := c_callersRaw(int32(skip)+1, &pcbuf[0], int32(len(pcbuf)))
return int(n)
}
// traceback prints a traceback of the current goroutine. // traceback prints a traceback of the current goroutine.
// This differs from the gc version, which is given pc, sp, lr and g and // This differs from the gc version, which is given pc, sp, lr and g and
// can print a traceback of any goroutine. // can print a traceback of any goroutine.
...@@ -83,7 +93,7 @@ func traceback(skip int32) { ...@@ -83,7 +93,7 @@ func traceback(skip int32) {
func printAncestorTraceback(ancestor ancestorInfo) { func printAncestorTraceback(ancestor ancestorInfo) {
print("[originating from goroutine ", ancestor.goid, "]:\n") print("[originating from goroutine ", ancestor.goid, "]:\n")
for fidx, pc := range ancestor.pcs { for fidx, pc := range ancestor.pcs {
function, file, line := funcfileline(pc, -1) function, file, line, _ := funcfileline(pc, -1)
if showfuncinfo(function, fidx == 0) { if showfuncinfo(function, fidx == 0) {
printAncestorTracebackFuncInfo(function, file, line, pc) printAncestorTracebackFuncInfo(function, file, line, pc)
} }
...@@ -92,7 +102,7 @@ func printAncestorTraceback(ancestor ancestorInfo) { ...@@ -92,7 +102,7 @@ func printAncestorTraceback(ancestor ancestorInfo) {
print("...additional frames elided...\n") print("...additional frames elided...\n")
} }
// Show what created goroutine, except main goroutine (goid 1). // Show what created goroutine, except main goroutine (goid 1).
function, file, line := funcfileline(ancestor.gopc, -1) function, file, line, _ := funcfileline(ancestor.gopc, -1)
if function != "" && showfuncinfo(function, false) && ancestor.goid != 1 { if function != "" && showfuncinfo(function, false) && ancestor.goid != 1 {
printcreatedby1(function, file, line, funcentry(ancestor.gopc), ancestor.gopc) printcreatedby1(function, file, line, funcentry(ancestor.gopc), ancestor.gopc)
} }
......
...@@ -26,11 +26,13 @@ struct caller ...@@ -26,11 +26,13 @@ struct caller
String file; String file;
intgo line; intgo line;
intgo index; intgo index;
intgo frames;
}; };
/* Collect file/line information for a PC value. If this is called /* Collect file/line information for a PC value. If this is called
more than once, due to inlined functions, we use the last call, as more than once, due to inlined functions, we record the number of
that is usually the most useful one. */ inlined frames but return file/func/line for the last call, as
that is usually the most useful one. */
static int static int
callback (void *data, uintptr_t pc __attribute__ ((unused)), callback (void *data, uintptr_t pc __attribute__ ((unused)),
...@@ -38,6 +40,8 @@ callback (void *data, uintptr_t pc __attribute__ ((unused)), ...@@ -38,6 +40,8 @@ callback (void *data, uintptr_t pc __attribute__ ((unused)),
{ {
struct caller *c = (struct caller *) data; struct caller *c = (struct caller *) data;
c->frames++;
/* The libbacktrace library says that these strings might disappear, /* The libbacktrace library says that these strings might disappear,
but with the current implementation they won't. We can't easily but with the current implementation they won't. We can't easily
allocate memory here, so for now assume that we can save a allocate memory here, so for now assume that we can save a
...@@ -125,18 +129,19 @@ __go_get_backtrace_state () ...@@ -125,18 +129,19 @@ __go_get_backtrace_state ()
return back_state; return back_state;
} }
/* Return function/file/line information for PC. The index parameter /* Return function/file/line/nframes information for PC. The index parameter
is the entry on the stack of inlined functions; -1 means the last is the entry on the stack of inlined functions; -1 means the last
one. */ one, with *nframes set to the count of inlined frames for this PC. */
static _Bool static _Bool
__go_file_line (uintptr pc, int index, String *fn, String *file, intgo *line) __go_file_line (uintptr pc, int index, String *fn, String *file, intgo *line, intgo *nframes)
{ {
struct caller c; struct caller c;
struct backtrace_state *state; struct backtrace_state *state;
runtime_memclr (&c, sizeof c); runtime_memclr (&c, sizeof c);
c.index = index; c.index = index;
c.frames = 0;
runtime_xadd (&__go_runtime_in_callers, 1); runtime_xadd (&__go_runtime_in_callers, 1);
state = __go_get_backtrace_state (); state = __go_get_backtrace_state ();
runtime_xadd (&__go_runtime_in_callers, -1); runtime_xadd (&__go_runtime_in_callers, -1);
...@@ -144,6 +149,7 @@ __go_file_line (uintptr pc, int index, String *fn, String *file, intgo *line) ...@@ -144,6 +149,7 @@ __go_file_line (uintptr pc, int index, String *fn, String *file, intgo *line)
*fn = c.fn; *fn = c.fn;
*file = c.file; *file = c.file;
*line = c.line; *line = c.line;
*nframes = c.frames;
// If backtrace_pcinfo didn't get the function name from the debug // If backtrace_pcinfo didn't get the function name from the debug
// info, try to get it from the symbol table. // info, try to get it from the symbol table.
...@@ -222,7 +228,7 @@ runtime_funcfileline (uintptr targetpc, int32 index) ...@@ -222,7 +228,7 @@ runtime_funcfileline (uintptr targetpc, int32 index)
struct funcfileline_return ret; struct funcfileline_return ret;
if (!__go_file_line (targetpc, index, &ret.retfn, &ret.retfile, if (!__go_file_line (targetpc, index, &ret.retfn, &ret.retfile,
&ret.retline)) &ret.retline, &ret.retframes))
runtime_memclr (&ret, sizeof ret); runtime_memclr (&ret, sizeof ret);
return ret; return ret;
} }
......
...@@ -63,7 +63,9 @@ callback (void *data, uintptr_t pc, const char *filename, int lineno, ...@@ -63,7 +63,9 @@ callback (void *data, uintptr_t pc, const char *filename, int lineno,
/* Skip thunks and recover functions. There is no equivalent to /* Skip thunks and recover functions. There is no equivalent to
these functions in the gc toolchain, so returning them here means these functions in the gc toolchain, so returning them here means
significantly different results for runtime.Caller(N). */ significantly different results for runtime.Caller(N). See also
similar code in runtime/mprof.go that strips out such functions
for block/mutex/memory profiles. */
if (function != NULL && !arg->keep_thunks) if (function != NULL && !arg->keep_thunks)
{ {
const char *p; const char *p;
...@@ -262,3 +264,62 @@ Callers (intgo skip, struct __go_open_array pc) ...@@ -262,3 +264,62 @@ Callers (intgo skip, struct __go_open_array pc)
return ret; return ret;
} }
struct callersRaw_data
{
uintptr* pcbuf;
int skip;
int index;
int max;
};
// Callback function for backtrace_simple. Just collect pc's.
// Return zero to continue, non-zero to stop.
static int callback_raw (void *data, uintptr_t pc)
{
struct callersRaw_data *arg = (struct callersRaw_data *) data;
if (arg->skip > 0)
{
--arg->skip;
return 0;
}
/* On the call to backtrace_simple the pc value was most likely
decremented if there was a normal call, since the pc referred to
the instruction where the call returned and not the call itself.
This was done so that the line number referred to the call
instruction. To make sure the actual pc from the call stack is
used, it is incremented here.
In the case of a signal, the pc was not decremented by
backtrace_full but still incremented here. That doesn't really
hurt anything since the line number is right and the pc refers to
the same instruction. */
arg->pcbuf[arg->index] = pc + 1;
arg->index++;
return arg->index >= arg->max;
}
/* runtime_callersRaw is similar to runtime_callers() above, but
it returns raw PC values as opposed to file/func/line locations. */
int32
runtime_callersRaw (int32 skip, uintptr *pcbuf, int32 m)
{
struct callersRaw_data data;
struct backtrace_state* state;
data.pcbuf = pcbuf;
data.skip = skip + 1;
data.index = 0;
data.max = m;
runtime_xadd (&__go_runtime_in_callers, 1);
state = __go_get_backtrace_state ();
backtrace_simple (state, 0, callback_raw, error_callback, &data);
runtime_xadd (&__go_runtime_in_callers, -1);
return data.index;
}
...@@ -485,6 +485,7 @@ struct funcfileline_return ...@@ -485,6 +485,7 @@ struct funcfileline_return
String retfn; String retfn;
String retfile; String retfile;
intgo retline; intgo retline;
intgo retframes;
}; };
struct funcfileline_return struct funcfileline_return
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment