php小编新一为您介绍一种使用ucontext的Golang+CGO技术,该技术在切换不同堆栈时可能会因为SIGSEGV或SIGTRAP而导致崩溃。这种崩溃是故意造成的,通过这种方式可以更好地理解和调试程序中的问题。本文将详细介绍如何利用ucontext和Golang+CGO技术进行堆栈切换,并对可能出现的崩溃进行分析和解决。无论您是初学者还是有一定经验的开发者,相信这篇文章都会对您有所帮助。
我目前正在编写Golang + CGO程序,并将在CGO中使用posix ucontext。由于我所有的核心逻辑都将在ucontext的bind函数中,所以我们应该捕获所有错误的代码。我通过访问空指针来测试它,这给了我完全不同的行为,所有这些行为都取决于 ucontext 使用的堆栈位置。以下是带有简化示例的更多详细信息。
如果我在线程的堆栈上分配ucontext堆栈,它将触发SIGSEGV。但如果我在堆上分配它,它会首先触发 SIGSEGV,然后在调用 morestack_noctxt
时触发 SIGTRAP,然后再调用 runtime.sigpanic
。我该如何解决这个问题或者如何获取 SIGSEGV?为什么需要morestack?
以下均为详细信息,任何建议或意见将不胜感激。谢谢!
崩溃(使用 malloc 堆栈):
<code>fatal: morestack on g0 SIGTRAP: trace trap PC=0x45f342 m=0 sigcode=128 signal arrived during cgo execution goroutine 1 [syscall]: runtime.cgocall(0x464870, 0xc000067f60) /usr/local/go/src/runtime/cgocall.go:157 +0x5c fp=0xc000067f38 sp=0xc000067f00 pc=0x40465c main._Cfunc_core_logic() _cgo_gotypes.go:39 +0x45 fp=0xc000067f60 sp=0xc000067f38 pc=0x4646e5 main.coreLogic() /container_share/works/badstack/main.go:46 +0x17 fp=0xc000067f70 sp=0xc000067f60 pc=0x464737 main.main() /container_share/works/badstack/main.go:51 +0x17 fp=0xc000067f80 sp=0xc000067f70 pc=0x464777 runtime.main() /usr/local/go/src/runtime/proc.go:250 +0x1d3 fp=0xc000067fe0 sp=0xc000067f80 pc=0x436913 runtime.goexit() /usr/local/go/src/runtime/asm_amd64.s:1598 +0x1 fp=0xc000067fe8 sp=0xc000067fe0 pc=0x45f4a1 goroutine 2 [force gc (idle)]: runtime.gopark(0x47a860, 0x6cb4d0, 0x11, 0x14, 0x1) /usr/local/go/src/runtime/proc.go:381 +0xfd fp=0xc000054f88 sp=0xc000054f58 pc=0x436dbd runtime.goparkunlock(0x0?, 0x0?, 0x0?, 0x0?) /usr/local/go/src/runtime/proc.go:387 +0x2a fp=0xc000054fb8 sp=0xc000054f88 pc=0x436e4a runtime.forcegchelper() /usr/local/go/src/runtime/proc.go:305 +0xb0 fp=0xc000054fe0 sp=0xc000054fb8 pc=0x436b90 runtime.goexit() /usr/local/go/src/runtime/asm_amd64.s:1598 +0x1 fp=0xc000054fe8 sp=0xc000054fe0 pc=0x45f4a1 created by runtime.init.6 /usr/local/go/src/runtime/proc.go:293 +0x25 goroutine 3 [GC sweep wait]: runtime.gopark(0x47a860, 0x6cb640, 0xc, 0x14, 0x1) /usr/local/go/src/runtime/proc.go:381 +0xfd fp=0xc000055758 sp=0xc000055728 pc=0x436dbd runtime.goparkunlock(0x0?, 0x0?, 0x0?, 0x0?) /usr/local/go/src/runtime/proc.go:387 +0x2a fp=0xc000055788 sp=0xc000055758 pc=0x436e4a runtime.bgsweep(0x0?) /usr/local/go/src/runtime/mgcsweep.go:278 +0x98 fp=0xc0000557c8 sp=0xc000055788 pc=0x421998 runtime.gcenable.func1() /usr/local/go/src/runtime/mgc.go:178 +0x26 fp=0xc0000557e0 sp=0xc0000557c8 pc=0x415f66 runtime.goexit() /usr/local/go/src/runtime/asm_amd64.s:1598 +0x1 fp=0xc0000557e8 sp=0xc0000557e0 pc=0x45f4a1 created by runtime.gcenable /usr/local/go/src/runtime/mgc.go:178 +0x6b goroutine 4 [GC scavenge wait]: runtime.gopark(0x47a860, 0x6cb6c0, 0xd, 0x14, 0x2) /usr/local/go/src/runtime/proc.go:381 +0xfd fp=0xc000055f48 sp=0xc000055f18 pc=0x436dbd runtime.goparkunlock(0x47ca80?, 0x1?, 0x0?, 0x0?) /usr/local/go/src/runtime/proc.go:387 +0x2a fp=0xc000055f78 sp=0xc000055f48 pc=0x436e4a runtime.(*scavengerState).park(0x6cb6c0) /usr/local/go/src/runtime/mgcscavenge.go:400 +0x4b fp=0xc000055fa0 sp=0xc000055f78 pc=0x41f44b runtime.bgscavenge(0x0?) /usr/local/go/src/runtime/mgcscavenge.go:628 +0x45 fp=0xc000055fc8 sp=0xc000055fa0 pc=0x41fa25 runtime.gcenable.func2() /usr/local/go/src/runtime/mgc.go:179 +0x26 fp=0xc000055fe0 sp=0xc000055fc8 pc=0x415f06 runtime.goexit() /usr/local/go/src/runtime/asm_amd64.s:1598 +0x1 fp=0xc000055fe8 sp=0xc000055fe0 pc=0x45f4a1 created by runtime.gcenable /usr/local/go/src/runtime/mgc.go:179 +0xaa rax 0x17 rbx 0x476413 rcx 0x460c95 rdx 0x17 rdi 0x2 rsi 0x476413 rbp 0x7f18906b3ff0 rsp 0x7f18906b3fd8 r8 0xffffffff r9 0x0 r10 0x8 r11 0x246 r12 0xc000067c70 r13 0x0 r14 0x6cb760 r15 0x0 rip 0x45f342 rflags 0x206 cs 0x33 fs 0x0 gs 0x0 </code>
崩溃(线程堆栈):
<code>fatal error: unexpected signal during runtime execution [signal SIGSEGV: segmentation violation code=0x1 addr=0x0 pc=0x4647a0] runtime stack: runtime.throw({0x479118?, 0xffffffffffffffff?}) /usr/local/go/src/runtime/panic.go:1047 +0x5d fp=0x7fff293551f0 sp=0x7fff293551c0 pc=0x43417d runtime.sigpanic() /usr/local/go/src/runtime/signal_unix.go:825 +0x285 fp=0x7fff29355220 sp=0x7fff293551f0 pc=0x4495a5 goroutine 1 [syscall]: runtime.cgocall(0x464890, 0xc000067f60) /usr/local/go/src/runtime/cgocall.go:157 +0x5c fp=0xc000067f38 sp=0xc000067f00 pc=0x40465c main._Cfunc_core_logic() _cgo_gotypes.go:39 +0x45 fp=0xc000067f60 sp=0xc000067f38 pc=0x4646e5 main.coreLogic() /container_share/works/badstack/main.go:46 +0x17 fp=0xc000067f70 sp=0xc000067f60 pc=0x464737 main.main() /container_share/works/badstack/main.go:51 +0x17 fp=0xc000067f80 sp=0xc000067f70 pc=0x464777 runtime.main() /usr/local/go/src/runtime/proc.go:250 +0x1d3 fp=0xc000067fe0 sp=0xc000067f80 pc=0x436913 runtime.goexit() /usr/local/go/src/runtime/asm_amd64.s:1598 +0x1 fp=0xc000067fe8 sp=0xc000067fe0 pc=0x45f4a1 goroutine 2 [force gc (idle)]: runtime.gopark(0x47a880, 0x6cb4d0, 0x11, 0x14, 0x1) /usr/local/go/src/runtime/proc.go:381 +0xfd fp=0xc000054f88 sp=0xc000054f58 pc=0x436dbd runtime.goparkunlock(0x0?, 0x0?, 0x0?, 0x0?) /usr/local/go/src/runtime/proc.go:387 +0x2a fp=0xc000054fb8 sp=0xc000054f88 pc=0x436e4a runtime.forcegchelper() /usr/local/go/src/runtime/proc.go:305 +0xb0 fp=0xc000054fe0 sp=0xc000054fb8 pc=0x436b90 runtime.goexit() /usr/local/go/src/runtime/asm_amd64.s:1598 +0x1 fp=0xc000054fe8 sp=0xc000054fe0 pc=0x45f4a1 created by runtime.init.6 /usr/local/go/src/runtime/proc.go:293 +0x25 goroutine 3 [GC sweep wait]: runtime.gopark(0x47a880, 0x6cb640, 0xc, 0x14, 0x1) /usr/local/go/src/runtime/proc.go:381 +0xfd fp=0xc000055758 sp=0xc000055728 pc=0x436dbd runtime.goparkunlock(0x0?, 0x0?, 0x0?, 0x0?) /usr/local/go/src/runtime/proc.go:387 +0x2a fp=0xc000055788 sp=0xc000055758 pc=0x436e4a runtime.bgsweep(0x0?) /usr/local/go/src/runtime/mgcsweep.go:278 +0x98 fp=0xc0000557c8 sp=0xc000055788 pc=0x421998 runtime.gcenable.func1() /usr/local/go/src/runtime/mgc.go:178 +0x26 fp=0xc0000557e0 sp=0xc0000557c8 pc=0x415f66 runtime.goexit() /usr/local/go/src/runtime/asm_amd64.s:1598 +0x1 fp=0xc0000557e8 sp=0xc0000557e0 pc=0x45f4a1 created by runtime.gcenable /usr/local/go/src/runtime/mgc.go:178 +0x6b goroutine 4 [GC scavenge wait]: runtime.gopark(0x47a880, 0x6cb6c0, 0xd, 0x14, 0x2) /usr/local/go/src/runtime/proc.go:381 +0xfd fp=0xc000055f48 sp=0xc000055f18 pc=0x436dbd runtime.goparkunlock(0x47caa0?, 0x1?, 0x0?, 0x0?) /usr/local/go/src/runtime/proc.go:387 +0x2a fp=0xc000055f78 sp=0xc000055f48 pc=0x436e4a runtime.(*scavengerState).park(0x6cb6c0) /usr/local/go/src/runtime/mgcscavenge.go:400 +0x4b fp=0xc000055fa0 sp=0xc000055f78 pc=0x41f44b runtime.bgscavenge(0x0?) /usr/local/go/src/runtime/mgcscavenge.go:628 +0x45 fp=0xc000055fc8 sp=0xc000055fa0 pc=0x41fa25 runtime.gcenable.func2() /usr/local/go/src/runtime/mgc.go:179 +0x26 fp=0xc000055fe0 sp=0xc000055fc8 pc=0x415f06 runtime.goexit() /usr/local/go/src/runtime/asm_amd64.s:1598 +0x1 fp=0xc000055fe8 sp=0xc000055fe0 pc=0x45f4a1 created by runtime.gcenable /usr/local/go/src/runtime/mgc.go:179 +0xaa </code>
GDB(带 malloc 堆栈):
这将调用runtime.morestack_noctxt,并最终得到badstack,因为它位于g0的堆栈上。
<code>(gdb) b runtime.sigpanic Breakpoint 1 at 0x449320: file /usr/local/go/src/runtime/signal_unix.go, line 822. (gdb) r Starting program: /container_share/works/badstack/main [Thread debugging using libthread_db enabled] Using host libthread_db library "/lib64/libthread_db.so.1". [New Thread 0x7fffd05d3700 (LWP 213229)] [New Thread 0x7fffcfdd2700 (LWP 213230)] [New Thread 0x7fffcf5d1700 (LWP 213231)] [New Thread 0x7fffcedd0700 (LWP 213232)] [New Thread 0x7fffce58f700 (LWP 213233)] [New Thread 0x7fffcdd8e700 (LWP 213234)] Thread 1 "main" received signal SIGSEGV, Segmentation fault. 0x00000000004647a0 in core () at /container_share/works/badstack/main.go:18 18 *ptr = 1024; (gdb) c Continuing. Thread 1 "main" hit Breakpoint 1, runtime.sigpanic () at /usr/local/go/src/runtime/signal_unix.go:822 822 func sigpanic() { (gdb) p $rsp $1 = (void *) 0x7fffcd58cfe8 (gdb) x/x $r14+0x10 0x6cb770 <runtime.g0+16>: 0xff7fed70 (gdb) c Continuing. fatal: morestack on g0 </code>
GDB(带有线程堆栈):
这似乎一切都按预期进行。
<code>(gdb) b runtime.sigpanic Breakpoint 1 at 0x449320: file /usr/local/go/src/runtime/signal_unix.go, line 822. (gdb) r Starting program: /container_share/works/badstack/main [Thread debugging using libthread_db enabled] Using host libthread_db library "/lib64/libthread_db.so.1". [New Thread 0x7fffd05d3700 (LWP 214288)] [New Thread 0x7fffcfdd2700 (LWP 214289)] [New Thread 0x7fffcf5d1700 (LWP 214290)] [New Thread 0x7fffcedd0700 (LWP 214291)] [New Thread 0x7fffce5cf700 (LWP 214292)] Thread 1 "main" received signal SIGSEGV, Segmentation fault. 0x00000000004647a0 in core () at /container_share/works/badstack/main.go:18 18 *ptr = 1024; (gdb) c Continuing. Thread 1 "main" hit Breakpoint 1, runtime.sigpanic () at /usr/local/go/src/runtime/signal_unix.go:822 822 func sigpanic() { (gdb) p $rsp $1 = (void *) 0x7fffffffd8e8 (gdb) x/x $r14+0x10 0x6cb770 <runtime.g0+16>: 0xff7fed70 (gdb) c Continuing. fatal error: unexpected signal during runtime execution [signal SIGSEGV: segmentation violation code=0x1 addr=0x0 pc=0x4647a0] </code>
环境:
❯ clang -v clang version 16.0.6 (Red Hat 16.0.6-2.module_el8+588+6f71ce7b) ❯ gcc -v gcc version 8.4.1 20200928 (Red Hat 8.4.1-1) (GCC) ❯ uname -a Linux 6cc94b77abd7 6.4.16-orbstack-00103-g02b40eb69695 #1 SMP Wed Sep 13 10:13:30 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux
复制者:
编译: CC=clang CXX=clang++ CFLAGS="-g -O0" go build -gcflags="all=-N -l" main.go
package main /* #include <stdio.h> #include <stddef.h> #include <ucontext.h> #include <stdlib.h> static ucontext_t uctx_main, uctx_core; void core() { // core logic // trigger crash int* ptr = NULL; *ptr = 1024; } void core_logic() { size_t size = 1024 * 1024; char stack[size]; // SIGSEGV //void* stack = malloc(size); // SIGTRAP if (getcontext(&uctx_core) == -1) printf("failed to getcontext"); uctx_core.uc_stack.ss_sp = stack; uctx_core.uc_stack.ss_size = size; uctx_core.uc_link = &uctx_main; makecontext(&uctx_core, core, 0); if (swapcontext(&uctx_main, &uctx_core) == -1) printf("failed to swapcontext"); printf("back\n"); } */ // #cgo CFLAGS: -g -O0 import "C" func coreLogic() { C.core_logic() } func main() { // Call the C function from Go coreLogic() }
这是我的猜测,但似乎不正确:当我使用堆堆栈时,它被视为有堆栈溢出,并且应该更多堆栈,但最终发现它是g0,然后是致命的。但看起来 goroutine 的堆栈比线程的堆栈低得多?
更新于2023年9月24日:
对于纯 C 程序,无论我使用什么堆栈,它都会获得 SIGSEGV。
<code>#include <stdio.h> #include <stddef.h> #include <ucontext.h> #include <stdlib.h> static ucontext_t uctx_main, uctx_core; void core() { // core logic // trigger crash int* ptr = NULL; *ptr = 1024; } void core_logic() { size_t size = 100 * 1024 * 1024; //char stack[size]; // SIGSEGV void* stack = malloc(size); // SIGTRAP if (getcontext(&uctx_core) == -1) printf("failed to getcontext"); uctx_core.uc_stack.ss_sp = stack; uctx_core.uc_stack.ss_size = size; uctx_core.uc_link = &uctx_main; makecontext(&uctx_core, core, 0); if (swapcontext(&uctx_main, &uctx_core) == -1) printf("failed to swapcontext"); printf("back\n"); } void coreLogic() { core_logic(); } int main() { coreLogic(); return 0; } </code>
最后,我在 Go 团队成员的帮助下解决了这个问题,打开了一个问题 在 Golang 的 github 存储库中,如果您需要的话。
TL;DR:这是从 Go1.21 开始的错误,应该在 Go1.22 中修复。即使您使用 Go1.20,由于另一个错误,您可能仍然面临这个问题。如果有人需要的话,也许我稍后会发布更多详细信息,或者您可以在我之前提到的问题中查看更多详细信息。
Atas ialah kandungan terperinci Golang+CGO menggunakan ranap ucontext (sengaja) dengan SIGSEGV atau SIGTRAP apabila menggunakan tindanan yang berbeza. Untuk maklumat lanjut, sila ikut artikel berkaitan lain di laman web China PHP!