golang schedule crash

时间:2023-03-09 16:04:00
golang schedule crash

golang 起超过100W的goroutine就会crash,这个算不算是个bug?


2036119xxxx
panic: inconsistent poll.fdMutex

goroutine 2041065 [running]:
internal/poll.(*fdMutex).rwlock(0xc420090050, 0xc517305500, 0x1089b36)
/usr/local/Cellar/go/1.10/libexec/src/internal/poll/fd_mutex.go:145 +0x13f
internal/poll.(*FD).writeLock(0xc420090050, 0x1, 0x10c1cee)
/usr/local/Cellar/go/1.10/libexec/src/internal/poll/fd_mutex.go:237 +0x32
internal/poll.(*FD).Write(0xc420090050, 0xc518138ee0, 0x7, 0x8, 0x0, 0x0, 0x0)
/usr/local/Cellar/go/1.10/libexec/src/internal/poll/fd_unix.go:243 +0x46
os.(*File).write(0xc42000c018, 0xc518138ee0, 0x7, 0x8, 0x0, 0x0, 0x0)
/usr/local/Cellar/go/1.10/libexec/src/os/file_unix.go:243 +0x4e
os.(*File).Write(0xc42000c018, 0xc518138ee0, 0x7, 0x8, 0x8, 0xc517a3bd50, 0x0)
/usr/local/Cellar/go/1.10/libexec/src/os/file.go:144 +0x6f
fmt.Fprint(0x10d2720, 0xc42000c018, 0xc5173057b8, 0x1, 0x1, 0x109ed40, 0x0, 0xc518138ed8)
/usr/local/Cellar/go/1.10/libexec/src/fmt/print.go:223 +0x8b
fmt.Print(0xc5173057b8, 0x1, 0x1, 0xc518138ed8, 0x0, 0x0)
/usr/local/Cellar/go/1.10/libexec/src/fmt/print.go:232 +0x57
main.test(0x1f24c1)
/Users/guanshuai/go/src/main.go:11 +0x81
created by main.main
/Users/guanshuai/go/src/main.go:19 +0x87
exit status 2

real 0m10.859s
user 0m29.635s
sys 0m10.859s
[guanshuai] src $


mac os

goroutine 15192446 [running]:
internal/poll.(*fdMutex).rwlock(0xc420080050, 0x1000000010bf000, 0x0)
/usr/local/go/src/internal/poll/fd_mutex.go:145 +0x13f
internal/poll.(*FD).writeLock(0xc420080050, 0xc420034800, 0xc4ddd4cc40)
/usr/local/go/src/internal/poll/fd_mutex.go:237 +0x32
internal/poll.(*FD).Write(0xc420080050, 0xc4ddcea480, 0x11, 0x20, 0x0, 0x0, 0x0)
/usr/local/go/src/internal/poll/fd_unix.go:243 +0x46
os.(*File).write(0xc42000c018, 0xc4ddcea480, 0x11, 0x20, 0x10, 0x10, 0x11)
/usr/local/go/src/os/file_unix.go:243 +0x4e
os.(*File).Write(0xc42000c018, 0xc4ddcea480, 0x11, 0x20, 0x8, 0xc4531795a0, 0x0)
/usr/local/go/src/os/file.go:144 +0x6f
fmt.Fprintln(0x10d09a0, 0xc42000c018, 0xc4ddd7e7a8, 0x2, 0x2, 0x109d300, 0x0, 0xc4531795a0)
/usr/local/go/src/fmt/print.go:255 +0x8b
fmt.Println(0xc4ddd7e7a8, 0x2, 0x2, 0xc4531795a0, 0x0, 0x0)
/usr/local/go/src/fmt/print.go:264 +0x57

  

// lock adds a reference to mu and locks mu.
// It reports whether mu is available for reading or writing.
func (mu *fdMutex) rwlock(read bool) bool {
var mutexBit, mutexWait, mutexMask uint64
var mutexSema *uint32
if read {
mutexBit = mutexRLock
mutexWait = mutexRWait
mutexMask = mutexRMask
mutexSema = &mu.rsema
} else {
mutexBit = mutexWLock
mutexWait = mutexWWait
mutexMask = mutexWMask
mutexSema = &mu.wsema
}
for {
old := atomic.LoadUint64(&mu.state)
if old&mutexClosed != 0 {
return false
}
var new uint64
if old&mutexBit == 0 {
// Lock is free, acquire it.
new = (old | mutexBit) + mutexRef
if new&mutexRefMask == 0 {
panic("inconsistent poll.fdMutex")
}
} else {
// Wait for lock.
new = old + mutexWait
if new&mutexMask == 0 {
panic("inconsistent poll.fdMutex")
}
}
if atomic.CompareAndSwapUint64(&mu.state, old, new) {
if old&mutexBit == 0 {
return true
}
runtime_Semacquire(mutexSema)
// The signaller has subtracted mutexWait.
}
}
}

  

// 1 bit - lock for write operations.
// 20 bits - total number of references (read+write+misc).
// 20 bits - number of outstanding read waiters.
// 20 bits - number of outstanding write waiter

// fdMutex.state is organized as follows:
// 1 bit - whether FD is closed, if set all subsequent lock operations will fail.
// 1 bit - lock for read operations.
// 1 bit - lock for write operations.
// 20 bits - total number of references (read+write+misc).
// 20 bits - number of outstanding read waiters.
// 20 bits - number of outstanding write waiters.
const (
mutexClosed = 1 << 0
mutexRLock = 1 << 1
mutexWLock = 1 << 2
mutexRef = 1 << 3
mutexRefMask = (1<<20 - 1) << 3
mutexRWait = 1 << 23
mutexRMask = (1<<20 - 1) << 23
mutexWWait = 1 << 43
mutexWMask = (1<<20 - 1) << 43
)

互斥的变量里面是64位的数值

1.bit 代表是否关闭

2bit 锁的类型是读

3bit 锁的类型是写

4bit 可以递增的单位数值

5bit-- (22位)   掩码   2**22 = 2 * 2 ** 20

>>> import math
>>> pow(2, 20)
1048576
>>>

  在一个file 或者socket上最大的锁为1000000, 所以有人提交patch,提示更精细的panic

https://go-review.googlesource.com/c/go/+/119956

internal/poll: better panic message for lock overflow

const overflowMsg = "too many concurrent operations on a single file or socket (max 1000000)"