Make shutdown handling predictable
This commit is contained in:
parent
559886f297
commit
d847be21c6
18
daemon.go
18
daemon.go
|
@ -52,11 +52,14 @@ type BaseDaemon struct {
|
|||
name string
|
||||
stats *statistics
|
||||
queue chan<- *task
|
||||
panicHandler func()
|
||||
panicHandler PanicHandler
|
||||
shutdown chan struct{}
|
||||
limit *ratelimit.Bucket
|
||||
}
|
||||
|
||||
// PanicHandler is a function that handles panics. Duh!
|
||||
type PanicHandler func(interface{})
|
||||
|
||||
var (
|
||||
errMissingSubscriptionFun = errors.New("subscription function is not set up")
|
||||
errMissingPublisher = errors.New("publisher is not set up")
|
||||
|
@ -88,7 +91,7 @@ func (d *BaseDaemon) SystemProcess(name string, a Actor) {
|
|||
|
||||
// Subscribe subscriasdsdfsdgdfgdfsg sdgsdfg sdfgs dfgdfgdfg.
|
||||
func (d *BaseDaemon) Subscribe(topic string, fun interface{}) {
|
||||
name := fmt.Sprintf("%s subscription for topic %q", d.String(), topic)
|
||||
name := fmt.Sprintf("Subscription for topic %q", topic)
|
||||
d.SystemProcess(name, func() {
|
||||
if d.subscribeFunc == nil {
|
||||
panic(errMissingSubscriptionFun)
|
||||
|
@ -134,7 +137,7 @@ func (d *BaseDaemon) LimitRate(times int, per time.Duration) {
|
|||
}
|
||||
|
||||
// HandlePanics sets up a panic handler function for the daemon.
|
||||
func (d *BaseDaemon) HandlePanics(f func()) {
|
||||
func (d *BaseDaemon) HandlePanics(f PanicHandler) {
|
||||
d.panicHandler = f
|
||||
}
|
||||
|
||||
|
@ -169,10 +172,15 @@ func (d *BaseDaemon) base() *BaseDaemon {
|
|||
return d
|
||||
}
|
||||
|
||||
func (d *BaseDaemon) handlePanic(err interface{}) {
|
||||
func (d *BaseDaemon) handlePanic() {
|
||||
err := recover()
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
|
||||
d.stats.registerError()
|
||||
if d.panicHandler != nil {
|
||||
d.panicHandler()
|
||||
d.panicHandler(err)
|
||||
}
|
||||
log.Printf("Daemon %s recovered from a panic\nError: %v\n", d, err)
|
||||
debug.PrintStack()
|
||||
|
|
|
@ -15,12 +15,12 @@ type NumberPrinter struct {
|
|||
|
||||
// Startup sets up panic handler and starts enqueuing number printing jobs.
|
||||
func (n *NumberPrinter) Startup() {
|
||||
n.HandlePanics(func() {
|
||||
log.Println("Oh, crap! There was a panic, take a look:")
|
||||
n.HandlePanics(func(err interface{}) {
|
||||
log.Printf("Oh, crap! There was a panic, take a look: %v", err)
|
||||
})
|
||||
|
||||
n.SystemProcess("Random Number Generator", n.generateNumbers)
|
||||
n.LimitRate(1, 2*time.Second)
|
||||
n.SystemProcess("Random Number Generator", n.generateNumbers)
|
||||
}
|
||||
|
||||
// Shutdown is empty due to the lack of cleanup.
|
||||
|
|
|
@ -19,10 +19,10 @@ type PriceUpdate struct {
|
|||
|
||||
// Startup creates a new subscription for ProductPriceUpdates topic.
|
||||
func (p *PriceConsumer) Startup() {
|
||||
p.LimitRate(1, 500*time.Millisecond)
|
||||
b.Subscribe("ProductPriceUpdates", func(u PriceUpdate) {
|
||||
log.Printf("Price for %q is now $%.2f", u.Product, u.Amount)
|
||||
})
|
||||
p.LimitRate(1, 500*time.Millisecond)
|
||||
}
|
||||
|
||||
// Shutdown is empty because PriceConsumer requires no cleanup upon exiting.
|
||||
|
|
|
@ -88,6 +88,7 @@ func Subscribe(consumer, topic string) satan.Streamer {
|
|||
case err := <-pc.Errors():
|
||||
log.Println("Kafka error:", err.Error())
|
||||
case <-stream.shutdown:
|
||||
pc.Close()
|
||||
return
|
||||
}
|
||||
}
|
||||
|
|
104
satan.go
104
satan.go
|
@ -1,7 +1,9 @@
|
|||
package satan
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"runtime/debug"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
@ -11,11 +13,14 @@ type Satan struct {
|
|||
SubscribeFunc SubscribeFunc
|
||||
Publisher Publisher
|
||||
|
||||
daemons []Daemon
|
||||
queue chan *task
|
||||
shutdown chan struct{}
|
||||
wg sync.WaitGroup
|
||||
latency *statistics
|
||||
daemons []Daemon
|
||||
queue chan *task
|
||||
latency *statistics
|
||||
|
||||
wgWorkers sync.WaitGroup
|
||||
wgSystem sync.WaitGroup
|
||||
shutdownWorkers chan struct{}
|
||||
shutdownSystem chan struct{}
|
||||
}
|
||||
|
||||
// Actor is a function that could be executed by daemon workers.
|
||||
|
@ -38,6 +43,14 @@ type Publisher interface {
|
|||
Close()
|
||||
}
|
||||
|
||||
type task struct {
|
||||
daemon Daemon
|
||||
actor Actor
|
||||
createdAt time.Time
|
||||
system bool
|
||||
name string
|
||||
}
|
||||
|
||||
const (
|
||||
defaultNumWorkers = 10
|
||||
)
|
||||
|
@ -45,9 +58,10 @@ const (
|
|||
// Summon creates a new instance of Satan.
|
||||
func Summon() *Satan {
|
||||
return &Satan{
|
||||
queue: make(chan *task),
|
||||
latency: newStatistics(),
|
||||
shutdown: make(chan struct{}),
|
||||
queue: make(chan *task),
|
||||
latency: newStatistics(),
|
||||
shutdownWorkers: make(chan struct{}),
|
||||
shutdownSystem: make(chan struct{}),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -58,7 +72,7 @@ func (s *Satan) AddDaemon(d Daemon) {
|
|||
base.subscribeFunc = s.SubscribeFunc
|
||||
base.publisher = s.Publisher
|
||||
base.queue = s.queue
|
||||
base.shutdown = make(chan struct{})
|
||||
base.shutdown = s.shutdownSystem
|
||||
base.stats = newStatistics()
|
||||
|
||||
go d.Startup()
|
||||
|
@ -67,50 +81,94 @@ func (s *Satan) AddDaemon(d Daemon) {
|
|||
|
||||
// StartDaemons starts all registered daemons.
|
||||
func (s *Satan) StartDaemons() {
|
||||
s.wg.Add(defaultNumWorkers)
|
||||
for i := 0; i < defaultNumWorkers; i++ {
|
||||
go func(i int) {
|
||||
s.runWorker(i)
|
||||
s.wg.Done()
|
||||
}(i)
|
||||
}
|
||||
}
|
||||
|
||||
// StopDaemons stops all running daemons.
|
||||
func (s *Satan) StopDaemons() {
|
||||
close(s.shutdownSystem)
|
||||
for _, d := range s.daemons {
|
||||
close(d.base().shutdown)
|
||||
d.Shutdown()
|
||||
}
|
||||
|
||||
s.wgSystem.Wait()
|
||||
close(s.shutdownWorkers)
|
||||
s.wgWorkers.Wait()
|
||||
close(s.queue)
|
||||
|
||||
for _, d := range s.daemons {
|
||||
stats := d.base().stats.snapshot()
|
||||
log.Printf("%s daemon performace statistics:\n%s\n", d.base(), stats)
|
||||
}
|
||||
|
||||
close(s.shutdown)
|
||||
s.wg.Wait()
|
||||
close(s.queue)
|
||||
|
||||
log.Printf("Task processing latency statistics:\n%s\n", s.latency.snapshot())
|
||||
}
|
||||
|
||||
func (s *Satan) runWorker(i int) {
|
||||
s.wgWorkers.Add(1)
|
||||
defer s.wgWorkers.Done()
|
||||
log.Printf("Starting worker #%d", i+1)
|
||||
defer log.Printf("Worker #%d has stopped", i+1)
|
||||
|
||||
for {
|
||||
select {
|
||||
case t := <-s.queue:
|
||||
dur := time.Now().UnixNano() - t.createdAt.UnixNano()
|
||||
s.latency.add(time.Duration(dur))
|
||||
if restart := t.process(); restart {
|
||||
s.queue <- t
|
||||
}
|
||||
s.processTask(t)
|
||||
default:
|
||||
select {
|
||||
case <-s.shutdown:
|
||||
case <-s.shutdownWorkers:
|
||||
return
|
||||
default:
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Satan) processTask(t *task) {
|
||||
dur := time.Now().UnixNano() - t.createdAt.UnixNano()
|
||||
s.latency.add(time.Duration(dur))
|
||||
|
||||
if t.system {
|
||||
s.processSystemTask(t)
|
||||
} else {
|
||||
s.processGeneralTask(t)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Satan) processSystemTask(t *task) {
|
||||
s.wgSystem.Add(1)
|
||||
defer s.wgSystem.Done()
|
||||
defer func() {
|
||||
if err := recover(); err != nil {
|
||||
log.Printf("System task %s recovered from a panic\nError: %v\n", t, err)
|
||||
debug.PrintStack()
|
||||
s.queue <- t // Restarting task
|
||||
} else {
|
||||
log.Printf("System task %s has stopped\n", t)
|
||||
}
|
||||
}()
|
||||
|
||||
log.Printf("Starting system task %s\n", t)
|
||||
t.actor() // <--- THE ACTION HAPPENS HERE
|
||||
}
|
||||
|
||||
func (s *Satan) processGeneralTask(t *task) {
|
||||
defer t.daemon.base().handlePanic()
|
||||
defer func(start time.Time) {
|
||||
dur := time.Now().UnixNano() - start.UnixNano()
|
||||
t.daemon.base().stats.add(time.Duration(dur))
|
||||
}(time.Now())
|
||||
|
||||
t.actor() // <--- THE ACTION HAPPENS HERE
|
||||
}
|
||||
|
||||
func (t *task) String() string {
|
||||
if t.name == "" {
|
||||
return fmt.Sprintf("[unnamed %s process]", t.daemon.base())
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%s[%s]", t.daemon.base(), t.name)
|
||||
}
|
||||
|
|
44
task.go
44
task.go
|
@ -1,44 +0,0 @@
|
|||
package satan
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"runtime/debug"
|
||||
"time"
|
||||
)
|
||||
|
||||
type task struct {
|
||||
daemon Daemon
|
||||
actor Actor
|
||||
createdAt time.Time
|
||||
system bool
|
||||
name string
|
||||
}
|
||||
|
||||
func (t *task) process() (restart bool) {
|
||||
defer func(start time.Time) {
|
||||
dur := time.Now().UnixNano() - start.UnixNano()
|
||||
t.daemon.base().stats.add(time.Duration(dur))
|
||||
|
||||
if err := recover(); err != nil {
|
||||
if t.system {
|
||||
log.Printf("System process %s recovered from a panic\nError: %v\n", t, err)
|
||||
debug.PrintStack()
|
||||
restart = true
|
||||
} else {
|
||||
t.daemon.base().handlePanic(err)
|
||||
}
|
||||
}
|
||||
}(time.Now())
|
||||
|
||||
t.actor() // <--- THE ACTION HAPPENS HERE
|
||||
return
|
||||
}
|
||||
|
||||
func (t *task) String() string {
|
||||
if t.name == "" {
|
||||
return fmt.Sprintf("[unnamed %s process]", t.daemon.base())
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%s[%s]", t.daemon.base(), t.name)
|
||||
}
|
Loading…
Reference in New Issue