feat: adapt new logging to workflow node processors

2025-07-29 21:54:29 +00:00 · 2025-03-17 22:50:25 +08:00
parent b620052b88
commit af5d7465a1
22 changed files with 714 additions and 274 deletions
--- a/internal/workflow/dispatcher/dispatcher.go
+++ b/internal/workflow/dispatcher/dispatcher.go
@@ -51,9 +51,10 @@ type WorkflowDispatcher struct {

 	workflowRepo    workflowRepository
 	workflowRunRepo workflowRunRepository
+	workflowLogRepo workflowLogRepository
 }

-func newWorkflowDispatcher(workflowRepo workflowRepository, workflowRunRepo workflowRunRepository) *WorkflowDispatcher {
+func newWorkflowDispatcher(workflowRepo workflowRepository, workflowRunRepo workflowRunRepository, workflowLogRepo workflowLogRepository) *WorkflowDispatcher {
 	dispatcher := &WorkflowDispatcher{
 		semaphore: make(chan struct{}, maxWorkers),

@@ -69,6 +70,7 @@ func newWorkflowDispatcher(workflowRepo workflowRepository, workflowRunRepo work

 		workflowRepo:    workflowRepo,
 		workflowRunRepo: workflowRunRepo,
+		workflowLogRepo: workflowLogRepo,
 	}

 	go func() {
@@ -86,139 +88,139 @@ func newWorkflowDispatcher(workflowRepo workflowRepository, workflowRunRepo work
 	return dispatcher
 }

-func (w *WorkflowDispatcher) Dispatch(data *WorkflowWorkerData) {
+func (d *WorkflowDispatcher) Dispatch(data *WorkflowWorkerData) {
 	if data == nil {
 		panic("worker data is nil")
 	}

-	w.enqueueWorker(data)
+	d.enqueueWorker(data)

 	select {
-	case w.chWork <- data:
+	case d.chWork <- data:
 	default:
 	}
 }

-func (w *WorkflowDispatcher) Cancel(runId string) {
+func (d *WorkflowDispatcher) Cancel(runId string) {
 	hasWorker := false

 	// 取消正在执行的 WorkflowRun
-	w.workerMutex.Lock()
-	if workflowId, ok := w.workerIdMap[runId]; ok {
-		if worker, ok := w.workers[workflowId]; ok {
+	d.workerMutex.Lock()
+	if workflowId, ok := d.workerIdMap[runId]; ok {
+		if worker, ok := d.workers[workflowId]; ok {
 			hasWorker = true
 			worker.Cancel()
-			delete(w.workers, workflowId)
-			delete(w.workerIdMap, runId)
+			delete(d.workers, workflowId)
+			delete(d.workerIdMap, runId)
 		}
 	}
-	w.workerMutex.Unlock()
+	d.workerMutex.Unlock()

 	// 移除排队中的 WorkflowRun
-	w.queueMutex.Lock()
-	w.queue = slices.Filter(w.queue, func(d *WorkflowWorkerData) bool {
+	d.queueMutex.Lock()
+	d.queue = slices.Filter(d.queue, func(d *WorkflowWorkerData) bool {
 		return d.RunId != runId
 	})
-	w.queueMutex.Unlock()
+	d.queueMutex.Unlock()

 	// 已挂起，查询 WorkflowRun 并更新其状态为 Canceled
 	if !hasWorker {
-		if run, err := w.workflowRunRepo.GetById(context.Background(), runId); err == nil {
+		if run, err := d.workflowRunRepo.GetById(context.Background(), runId); err == nil {
 			if run.Status == domain.WorkflowRunStatusTypePending || run.Status == domain.WorkflowRunStatusTypeRunning {
 				run.Status = domain.WorkflowRunStatusTypeCanceled
-				w.workflowRunRepo.Save(context.Background(), run)
+				d.workflowRunRepo.Save(context.Background(), run)
 			}
 		}
 	}
 }

-func (w *WorkflowDispatcher) Shutdown() {
+func (d *WorkflowDispatcher) Shutdown() {
 	// 清空排队中的 WorkflowRun
-	w.queueMutex.Lock()
-	w.queue = make([]*WorkflowWorkerData, 0)
-	w.queueMutex.Unlock()
+	d.queueMutex.Lock()
+	d.queue = make([]*WorkflowWorkerData, 0)
+	d.queueMutex.Unlock()

 	// 等待所有正在执行的 WorkflowRun 完成
-	w.workerMutex.Lock()
-	for _, worker := range w.workers {
+	d.workerMutex.Lock()
+	for _, worker := range d.workers {
 		worker.Cancel()
-		delete(w.workers, worker.Data.WorkflowId)
-		delete(w.workerIdMap, worker.Data.RunId)
+		delete(d.workers, worker.Data.WorkflowId)
+		delete(d.workerIdMap, worker.Data.RunId)
 	}
-	w.workerMutex.Unlock()
-	w.wg.Wait()
+	d.workerMutex.Unlock()
+	d.wg.Wait()
 }

-func (w *WorkflowDispatcher) enqueueWorker(data *WorkflowWorkerData) {
-	w.queueMutex.Lock()
-	defer w.queueMutex.Unlock()
-	w.queue = append(w.queue, data)
+func (d *WorkflowDispatcher) enqueueWorker(data *WorkflowWorkerData) {
+	d.queueMutex.Lock()
+	defer d.queueMutex.Unlock()
+	d.queue = append(d.queue, data)
 }

-func (w *WorkflowDispatcher) dequeueWorker() {
+func (d *WorkflowDispatcher) dequeueWorker() {
 	for {
 		select {
-		case w.semaphore <- struct{}{}:
+		case d.semaphore <- struct{}{}:
 		default:
 			// 达到最大并发数
 			return
 		}

-		w.queueMutex.Lock()
-		if len(w.queue) == 0 {
-			w.queueMutex.Unlock()
-			<-w.semaphore
+		d.queueMutex.Lock()
+		if len(d.queue) == 0 {
+			d.queueMutex.Unlock()
+			<-d.semaphore
 			return
 		}

-		data := w.queue[0]
-		w.queue = w.queue[1:]
-		w.queueMutex.Unlock()
+		data := d.queue[0]
+		d.queue = d.queue[1:]
+		d.queueMutex.Unlock()

 		// 检查是否有相同 WorkflowId 的 WorkflowRun 正在执行
 		// 如果有，则重新排队，以保证同一个工作流同一时间内只有一个正在执行
 		// 即不同 WorkflowId 的任务并行化，相同 WorkflowId 的任务串行化
-		w.workerMutex.Lock()
-		if _, exists := w.workers[data.WorkflowId]; exists {
-			w.queueMutex.Lock()
-			w.queue = append(w.queue, data)
-			w.queueMutex.Unlock()
-			w.workerMutex.Unlock()
+		d.workerMutex.Lock()
+		if _, exists := d.workers[data.WorkflowId]; exists {
+			d.queueMutex.Lock()
+			d.queue = append(d.queue, data)
+			d.queueMutex.Unlock()
+			d.workerMutex.Unlock()

-			<-w.semaphore
+			<-d.semaphore

 			continue
 		}

 		ctx, cancel := context.WithCancel(context.Background())
-		w.workers[data.WorkflowId] = &workflowWorker{data, cancel}
-		w.workerIdMap[data.RunId] = data.WorkflowId
-		w.workerMutex.Unlock()
+		d.workers[data.WorkflowId] = &workflowWorker{data, cancel}
+		d.workerIdMap[data.RunId] = data.WorkflowId
+		d.workerMutex.Unlock()

-		w.wg.Add(1)
-		go w.work(ctx, data)
+		d.wg.Add(1)
+		go d.work(ctx, data)
 	}
 }

-func (w *WorkflowDispatcher) work(ctx context.Context, data *WorkflowWorkerData) {
+func (d *WorkflowDispatcher) work(ctx context.Context, data *WorkflowWorkerData) {
 	defer func() {
-		<-w.semaphore
-		w.workerMutex.Lock()
-		delete(w.workers, data.WorkflowId)
-		delete(w.workerIdMap, data.RunId)
-		w.workerMutex.Unlock()
+		<-d.semaphore
+		d.workerMutex.Lock()
+		delete(d.workers, data.WorkflowId)
+		delete(d.workerIdMap, data.RunId)
+		d.workerMutex.Unlock()

-		w.wg.Done()
+		d.wg.Done()

 		// 尝试取出排队中的其他 WorkflowRun 继续执行
 		select {
-		case w.chCandi <- struct{}{}:
+		case d.chCandi <- struct{}{}:
 		default:
 		}
 	}()

 	// 查询 WorkflowRun
-	run, err := w.workflowRunRepo.GetById(ctx, data.RunId)
+	run, err := d.workflowRunRepo.GetById(ctx, data.RunId)
 	if err != nil {
 		if !(errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded)) {
 			app.GetLogger().Error(fmt.Sprintf("failed to get workflow run #%s", data.RunId), "err", err)
@@ -228,13 +230,13 @@ func (w *WorkflowDispatcher) work(ctx context.Context, data *WorkflowWorkerData)
 		return
 	} else if ctx.Err() != nil {
 		run.Status = domain.WorkflowRunStatusTypeCanceled
-		w.workflowRunRepo.Save(ctx, run)
+		d.workflowRunRepo.Save(ctx, run)
 		return
 	}

 	// 更新 WorkflowRun 状态为 Running
 	run.Status = domain.WorkflowRunStatusTypeRunning
-	if _, err := w.workflowRunRepo.Save(ctx, run); err != nil {
+	if _, err := d.workflowRunRepo.Save(ctx, run); err != nil {
 		if !(errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded)) {
 			panic(err)
 		}
@@ -242,19 +244,17 @@ func (w *WorkflowDispatcher) work(ctx context.Context, data *WorkflowWorkerData)
 	}

 	// 执行工作流
-	invoker := newWorkflowInvokerWithData(w.workflowRunRepo, data)
+	invoker := newWorkflowInvokerWithData(d.workflowLogRepo, data)
 	if runErr := invoker.Invoke(ctx); runErr != nil {
 		if errors.Is(runErr, context.Canceled) {
 			run.Status = domain.WorkflowRunStatusTypeCanceled
-			run.Logs = invoker.GetLogs()
 		} else {
 			run.Status = domain.WorkflowRunStatusTypeFailed
 			run.EndedAt = time.Now()
-			run.Logs = invoker.GetLogs()
 			run.Error = runErr.Error()
 		}

-		if _, err := w.workflowRunRepo.Save(ctx, run); err != nil {
+		if _, err := d.workflowRunRepo.Save(ctx, run); err != nil {
 			if !(errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded)) {
 				panic(err)
 			}
@@ -265,14 +265,13 @@ func (w *WorkflowDispatcher) work(ctx context.Context, data *WorkflowWorkerData)

 	// 更新 WorkflowRun 状态为 Succeeded/Failed
 	run.EndedAt = time.Now()
-	run.Logs = invoker.GetLogs()
-	run.Error = domain.WorkflowRunLogs(invoker.GetLogs()).ErrorString()
+	run.Error = invoker.GetLogs().ErrorString()
 	if run.Error == "" {
 		run.Status = domain.WorkflowRunStatusTypeSucceeded
 	} else {
 		run.Status = domain.WorkflowRunStatusTypeFailed
 	}
-	if _, err := w.workflowRunRepo.Save(ctx, run); err != nil {
+	if _, err := d.workflowRunRepo.Save(ctx, run); err != nil {
 		if !(errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded)) {
 			panic(err)
 		}