From bbfb8aba443e4f72aad6cf55c9b534f2d3428801 Mon Sep 17 00:00:00 2001
From: Eric Rakestraw <eric@rakestraw.us>
Date: Mon, 27 Apr 2026 19:49:25 -0500
Subject: [PATCH] Implemented a module to detect backchannel segments, and
 updated the coalesce module to ignore them when coalescing same-speaker turns

---
 README.md                                |  21 ++++-
 internal/backchannel/backchannel.go      |  60 +++++++++++++
 internal/backchannel/backchannel_test.go | 104 +++++++++++++++++++++++
 internal/builtin/postprocess.go          |  18 ++++
 internal/builtin/registry.go             |   1 +
 internal/cli/merge_test.go               |  87 +++++++++++++++++++
 internal/coalesce/coalesce.go            |  23 ++++-
 internal/coalesce/coalesce_test.go       |  49 +++++++++++
 internal/config/config.go                |   2 +-
 internal/model/model.go                  |   1 +
 10 files changed, 360 insertions(+), 6 deletions(-)
 create mode 100644 internal/backchannel/backchannel.go
 create mode 100644 internal/backchannel/backchannel_test.go

diff --git a/README.md b/README.md
index e4b056e..aca79dc 100644
--- a/README.md
+++ b/README.md
@@ -44,7 +44,7 @@ Optional flags:
 - `--input-reader`: input reader module. Default: `json-files`.
 - `--output-modules`: comma-separated output modules. Default: `json`.
 - `--preprocessing-modules`: comma-separated preprocessing modules. Default: `validate-raw,normalize-speakers,trim-text`.
-- `--postprocessing-modules`: comma-separated postprocessing modules. Default: `detect-overlaps,resolve-overlaps,coalesce,detect-overlaps,autocorrect,assign-ids,validate-output`.
+- `--postprocessing-modules`: comma-separated postprocessing modules. Default: `detect-overlaps,resolve-overlaps,backchannel,coalesce,detect-overlaps,autocorrect,assign-ids,validate-output`.
 - `--coalesce-gap`: maximum same-speaker gap in seconds for `coalesce`. Default: `3.0`.
 
 ## Input JSON Format
@@ -151,7 +151,7 @@ The merged output uses the current seriatim envelope:
     "input_reader": "json-files",
     "input_files": ["eric.json", "mike.json"],
     "preprocessing_modules": ["validate-raw", "normalize-speakers", "trim-text"],
-    "postprocessing_modules": ["detect-overlaps", "resolve-overlaps", "coalesce", "detect-overlaps", "autocorrect", "assign-ids", "validate-output"],
+    "postprocessing_modules": ["detect-overlaps", "resolve-overlaps", "backchannel", "coalesce", "detect-overlaps", "autocorrect", "assign-ids", "validate-output"],
     "output_modules": ["json"]
   },
   "segments": [
@@ -173,7 +173,8 @@ The merged output uses the current seriatim envelope:
       "speaker": "Eric Rakestraw",
       "start": 2.0,
       "end": 2.5,
-      "text": "Resolved word run"
+      "text": "Resolved word run",
+      "categories": ["backchannel"]
     }
   ],
   "overlap_groups": [
@@ -215,7 +216,7 @@ Overlap behavior:
 
 ## Overlap Resolution
 
-The default postprocessing pipeline runs `detect-overlaps`, then `resolve-overlaps`, then `coalesce`, then a second `detect-overlaps` pass.
+The default postprocessing pipeline runs `detect-overlaps`, then `resolve-overlaps`, then `backchannel`, then `coalesce`, then a second `detect-overlaps` pass.
 
 For each detected overlap group, `resolve-overlaps` uses preserved WhisperX word timing to build smaller word-run replacement segments:
 
@@ -236,12 +237,24 @@ For each detected overlap group, `resolve-overlaps` uses preserved WhisperX word
 - If a speaker has no usable word timing in a group, that speaker's original segment is kept.
 - If no speakers in a group have usable word timing, the original group and annotations remain unchanged.
 
+## Backchannels
+
+The default pipeline runs `backchannel` before `coalesce`. It tags short acknowledgement segments with:
+
+```json
+"categories": ["backchannel"]
+```
+
+Backchannel matching is case-insensitive, trims surrounding whitespace, and requires a matching acknowledgement phrase, no more than three whitespace-delimited words, and duration no greater than `1.0` second.
+
 ## Coalescing
 
 The default pipeline runs `coalesce` before the second overlap detection pass. It merges adjacent same-speaker segments in the transcript's current order when `next.start - current.end <= --coalesce-gap`.
 
 Coalesced segments use `source_ref` values such as `coalesce:1`, include `derived_from`, and omit `source_segment_index`.
 
+Different-speaker backchannel segments do not block coalescing of surrounding same-speaker segments. When same-speaker segments are coalesced, any `backchannel` category from the merged inputs is dropped from the coalesced segment.
+
 ## Autocorrect
 
 Autocorrect is included in the default postprocessing pipeline. If `--autocorrect` is omitted, the module leaves transcript text unchanged and records a skip event in the optional report.
diff --git a/internal/backchannel/backchannel.go b/internal/backchannel/backchannel.go
new file mode 100644
index 0000000..5a897e2
--- /dev/null
+++ b/internal/backchannel/backchannel.go
@@ -0,0 +1,60 @@
+package backchannel
+
+import (
+	"regexp"
+	"strings"
+
+	"gitea.maximumdirect.net/eric/seriatim/internal/model"
+)
+
+const Category = "backchannel"
+
+var patterns = []*regexp.Regexp{
+	regexp.MustCompile(`(?i)^(yeah|yep|yes|right|okay|ok|sure|mm+h?m+|uh[- ]huh|mhm|mm-hmm)\.?$`),
+	regexp.MustCompile(`(?i)^(yeah|yep|right|okay|ok)([,.\s]+(yeah|yep|right|okay|ok))*\.?$`),
+	regexp.MustCompile(`(?i)^(i see|got it|makes sense|that makes sense|fair enough|sounds good)\.?$`),
+}
+
+// Apply tags matching short acknowledgement segments.
+func Apply(in model.MergedTranscript) (model.MergedTranscript, int) {
+	tagged := 0
+	for index := range in.Segments {
+		if !matches(in.Segments[index]) {
+			continue
+		}
+		if hasCategory(in.Segments[index], Category) {
+			continue
+		}
+		in.Segments[index].Categories = append(in.Segments[index].Categories, Category)
+		tagged++
+	}
+	return in, tagged
+}
+
+func matches(segment model.Segment) bool {
+	text := strings.TrimSpace(segment.Text)
+	if text == "" {
+		return false
+	}
+	if len(strings.Fields(text)) > 3 {
+		return false
+	}
+	if segment.End-segment.Start > 1.0 {
+		return false
+	}
+	for _, pattern := range patterns {
+		if pattern.MatchString(text) {
+			return true
+		}
+	}
+	return false
+}
+
+func hasCategory(segment model.Segment, category string) bool {
+	for _, existing := range segment.Categories {
+		if existing == category {
+			return true
+		}
+	}
+	return false
+}
diff --git a/internal/backchannel/backchannel_test.go b/internal/backchannel/backchannel_test.go
new file mode 100644
index 0000000..291cbbb
--- /dev/null
+++ b/internal/backchannel/backchannel_test.go
@@ -0,0 +1,104 @@
+package backchannel
+
+import (
+	"reflect"
+	"testing"
+
+	"gitea.maximumdirect.net/eric/seriatim/internal/model"
+)
+
+func TestApplyTagsVerySafeBackchannels(t *testing.T) {
+	for _, text := range []string{"yeah", "Yep.", "mmhm", "uh-huh", "mm-hmm"} {
+		t.Run(text, func(t *testing.T) {
+			got, tagged := Apply(transcript(segment(text, 1, 1.5)))
+			if tagged != 1 {
+				t.Fatalf("tagged = %d, want 1", tagged)
+			}
+			assertCategories(t, got.Segments[0], []string{Category})
+		})
+	}
+}
+
+func TestApplyTagsRepeatedBackchannels(t *testing.T) {
+	got, tagged := Apply(transcript(segment("Yeah, okay yep.", 1, 1.8)))
+	if tagged != 1 {
+		t.Fatalf("tagged = %d, want 1", tagged)
+	}
+	assertCategories(t, got.Segments[0], []string{Category})
+}
+
+func TestApplyTagsShortAcknowledgements(t *testing.T) {
+	for _, text := range []string{"i see", "Got it.", "sounds good"} {
+		t.Run(text, func(t *testing.T) {
+			got, tagged := Apply(transcript(segment(text, 1, 1.8)))
+			if tagged != 1 {
+				t.Fatalf("tagged = %d, want 1", tagged)
+			}
+			assertCategories(t, got.Segments[0], []string{Category})
+		})
+	}
+}
+
+func TestApplyMatchesTrimAwareCaseInsensitive(t *testing.T) {
+	got, tagged := Apply(transcript(segment("  YES. ", 1, 1.2)))
+	if tagged != 1 {
+		t.Fatalf("tagged = %d, want 1", tagged)
+	}
+	assertCategories(t, got.Segments[0], []string{Category})
+}
+
+func TestApplyDoesNotTagNonMatches(t *testing.T) {
+	got, tagged := Apply(transcript(segment("yeah I think so", 1, 1.5)))
+	if tagged != 0 {
+		t.Fatalf("tagged = %d, want 0", tagged)
+	}
+	assertCategories(t, got.Segments[0], nil)
+}
+
+func TestApplyRejectsWordCountOverThree(t *testing.T) {
+	got, tagged := Apply(transcript(segment("that makes sense okay", 1, 1.5)))
+	if tagged != 0 {
+		t.Fatalf("tagged = %d, want 0", tagged)
+	}
+	assertCategories(t, got.Segments[0], nil)
+}
+
+func TestApplyRejectsDurationOverOneSecond(t *testing.T) {
+	got, tagged := Apply(transcript(segment("yeah", 1, 2.1)))
+	if tagged != 0 {
+		t.Fatalf("tagged = %d, want 0", tagged)
+	}
+	assertCategories(t, got.Segments[0], nil)
+}
+
+func TestApplyPreservesExistingCategoriesAndAvoidsDuplicate(t *testing.T) {
+	existing := segment("yeah", 1, 1.2)
+	existing.Categories = []string{"manual", Category}
+
+	got, tagged := Apply(transcript(existing))
+	if tagged != 0 {
+		t.Fatalf("tagged = %d, want 0", tagged)
+	}
+	assertCategories(t, got.Segments[0], []string{"manual", Category})
+}
+
+func transcript(segments ...model.Segment) model.MergedTranscript {
+	return model.MergedTranscript{Segments: segments}
+}
+
+func segment(text string, start float64, end float64) model.Segment {
+	return model.Segment{
+		Source:  "input.json",
+		Speaker: "Alice",
+		Start:   start,
+		End:     end,
+		Text:    text,
+	}
+}
+
+func assertCategories(t *testing.T, segment model.Segment, want []string) {
+	t.Helper()
+	if !reflect.DeepEqual(segment.Categories, want) {
+		t.Fatalf("categories = %v, want %v", segment.Categories, want)
+	}
+}
diff --git a/internal/builtin/postprocess.go b/internal/builtin/postprocess.go
index 41e0c74..41913c5 100644
--- a/internal/builtin/postprocess.go
+++ b/internal/builtin/postprocess.go
@@ -5,6 +5,7 @@ import (
 	"fmt"
 
 	"gitea.maximumdirect.net/eric/seriatim/internal/autocorrect"
+	"gitea.maximumdirect.net/eric/seriatim/internal/backchannel"
 	"gitea.maximumdirect.net/eric/seriatim/internal/coalesce"
 	"gitea.maximumdirect.net/eric/seriatim/internal/config"
 	"gitea.maximumdirect.net/eric/seriatim/internal/model"
@@ -98,6 +99,23 @@ func (resolveOverlaps) Process(ctx context.Context, in model.MergedTranscript, c
 	}, nil
 }
 
+type backchannelPostprocessor struct{}
+
+func (backchannelPostprocessor) Name() string {
+	return "backchannel"
+}
+
+func (backchannelPostprocessor) Process(ctx context.Context, in model.MergedTranscript, cfg config.Config) (model.MergedTranscript, []report.Event, error) {
+	if err := ctx.Err(); err != nil {
+		return model.MergedTranscript{}, nil, err
+	}
+
+	out, tagged := backchannel.Apply(in)
+	return out, []report.Event{
+		report.Info("postprocessing", "backchannel", fmt.Sprintf("tagged %d backchannel segment(s)", tagged)),
+	}, nil
+}
+
 type coalescePostprocessor struct{}
 
 func (coalescePostprocessor) Name() string {
diff --git a/internal/builtin/registry.go b/internal/builtin/registry.go
index c1794f5..7faf232 100644
--- a/internal/builtin/registry.go
+++ b/internal/builtin/registry.go
@@ -13,6 +13,7 @@ func NewRegistry() *pipeline.Registry {
 	registry.RegisterMerger(placeholderMerger{})
 	registry.RegisterPostprocessor(detectOverlaps{})
 	registry.RegisterPostprocessor(resolveOverlaps{})
+	registry.RegisterPostprocessor(backchannelPostprocessor{})
 	registry.RegisterPostprocessor(coalescePostprocessor{})
 	registry.RegisterPostprocessor(assignIDs{})
 	registry.RegisterPostprocessor(noopPostprocessor{name: "validate-output"})
diff --git a/internal/cli/merge_test.go b/internal/cli/merge_test.go
index f8d636e..132e997 100644
--- a/internal/cli/merge_test.go
+++ b/internal/cli/merge_test.go
@@ -90,6 +90,7 @@ func TestMergeWritesMergedOutputAndReport(t *testing.T) {
 		"placeholder-merger",
 		"detect-overlaps",
 		"resolve-overlaps",
+		"backchannel",
 		"coalesce",
 		"detect-overlaps",
 		"autocorrect",
@@ -585,6 +586,92 @@ func TestMergeCoalesceGapOverridePreventsMerge(t *testing.T) {
 	}
 }
 
+func TestMergeTagsBackchannelSegments(t *testing.T) {
+	dir := t.TempDir()
+	input := writeJSONFile(t, dir, "input.json", `{
+		"segments": [
+			{"start": 1, "end": 1.5, "text": " Yeah. "},
+			{"start": 6, "end": 7, "text": "not a backchannel"}
+		]
+	}`)
+	output := filepath.Join(dir, "merged.json")
+	reportPath := filepath.Join(dir, "report.json")
+
+	err := executeMerge(
+		"--input-file", input,
+		"--output-file", output,
+		"--report-file", reportPath,
+	)
+	if err != nil {
+		t.Fatalf("merge failed: %v", err)
+	}
+
+	var transcript model.FinalTranscript
+	readJSON(t, output, &transcript)
+	if len(transcript.Segments) != 2 {
+		t.Fatalf("segment count = %d, want 2", len(transcript.Segments))
+	}
+	if !equalStrings(transcript.Segments[0].Categories, []string{"backchannel"}) {
+		t.Fatalf("segment categories = %v, want [backchannel]", transcript.Segments[0].Categories)
+	}
+	if len(transcript.Segments[1].Categories) != 0 {
+		t.Fatalf("unexpected categories = %v", transcript.Segments[1].Categories)
+	}
+
+	var rpt report.Report
+	readJSON(t, reportPath, &rpt)
+	if !hasReportEvent(rpt, "postprocessing", "backchannel", "tagged 1 backchannel segment(s)") {
+		t.Fatal("expected backchannel report event")
+	}
+}
+
+func TestMergeCoalescesAroundDifferentSpeakerBackchannel(t *testing.T) {
+	dir := t.TempDir()
+	inputA := writeJSONFile(t, dir, "a.json", `{
+		"segments": [
+			{"start": 1, "end": 2, "text": "first"},
+			{"start": 3, "end": 4, "text": "second"}
+		]
+	}`)
+	inputB := writeJSONFile(t, dir, "b.json", `{
+		"segments": [
+			{"start": 2.2, "end": 2.5, "text": "yeah"}
+		]
+	}`)
+	speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
+  - speaker: Alice
+    match: ["a.json"]
+  - speaker: Bob
+    match: ["b.json"]
+`)
+	output := filepath.Join(dir, "merged.json")
+
+	err := executeMerge(
+		"--input-file", inputA,
+		"--input-file", inputB,
+		"--speakers", speakers,
+		"--output-file", output,
+	)
+	if err != nil {
+		t.Fatalf("merge failed: %v", err)
+	}
+
+	var transcript model.FinalTranscript
+	readJSON(t, output, &transcript)
+	if len(transcript.Segments) != 2 {
+		t.Fatalf("segment count = %d, want 2", len(transcript.Segments))
+	}
+	if transcript.Segments[0].Speaker != "Alice" || transcript.Segments[0].Text != "first second" {
+		t.Fatalf("first segment = %#v, want coalesced Alice", transcript.Segments[0])
+	}
+	if len(transcript.Segments[0].Categories) != 0 {
+		t.Fatalf("coalesced segment categories = %v, want none", transcript.Segments[0].Categories)
+	}
+	if transcript.Segments[1].Speaker != "Bob" || !equalStrings(transcript.Segments[1].Categories, []string{"backchannel"}) {
+		t.Fatalf("second segment = %#v, want Bob backchannel", transcript.Segments[1])
+	}
+}
+
 func TestSpeakerMatchingUsesFirstMatchingRuleCaseInsensitive(t *testing.T) {
 	dir := t.TempDir()
 	input := writeJSONFile(t, dir, "2026-04-19-Adam_Rakestraw.json", `{
diff --git a/internal/coalesce/coalesce.go b/internal/coalesce/coalesce.go
index dac65db..52c12f2 100644
--- a/internal/coalesce/coalesce.go
+++ b/internal/coalesce/coalesce.go
@@ -27,16 +27,24 @@ func Apply(in model.MergedTranscript, gap float64) (model.MergedTranscript, Summ
 	coalescedID := 0
 
 	current := newRun(in.Segments[0])
+	pendingBackchannels := make([]model.Segment, 0)
 	for _, segment := range in.Segments[1:] {
 		if current.canMerge(segment, gap) {
 			current.add(segment)
 			continue
 		}
+		if segment.Speaker != current.speaker() && hasCategory(segment, "backchannel") {
+			pendingBackchannels = append(pendingBackchannels, segment)
+			continue
+		}
 
 		coalescedID = appendRun(&out, current, coalescedID, &summary)
+		out.Segments = append(out.Segments, pendingBackchannels...)
+		pendingBackchannels = pendingBackchannels[:0]
 		current = newRun(segment)
 	}
-	appendRun(&out, current, coalescedID, &summary)
+	coalescedID = appendRun(&out, current, coalescedID, &summary)
+	out.Segments = append(out.Segments, pendingBackchannels...)
 
 	return out, summary
 }
@@ -56,6 +64,10 @@ func (r run) canMerge(next model.Segment, gap float64) bool {
 	return current.Speaker == next.Speaker && next.Start-current.End <= gap
 }
 
+func (r run) speaker() string {
+	return r.segments[0].Speaker
+}
+
 func (r *run) add(segment model.Segment) {
 	r.segments = append(r.segments, segment)
 }
@@ -116,3 +128,12 @@ func segmentRef(segment model.Segment) string {
 	}
 	return segment.Source
 }
+
+func hasCategory(segment model.Segment, category string) bool {
+	for _, existing := range segment.Categories {
+		if existing == category {
+			return true
+		}
+	}
+	return false
+}
diff --git a/internal/coalesce/coalesce_test.go b/internal/coalesce/coalesce_test.go
index 33cbda4..cff3ebf 100644
--- a/internal/coalesce/coalesce_test.go
+++ b/internal/coalesce/coalesce_test.go
@@ -137,6 +137,55 @@ func TestApplyDerivedProvenanceForMixedSourcesAndDerivedInputs(t *testing.T) {
 	}
 }
 
+func TestApplyDropsBackchannelCategoryFromMergedSameSpeakerRun(t *testing.T) {
+	first := segment("a.json", 0, "Alice", 1, 2, "yeah")
+	first.Categories = []string{"backchannel"}
+	second := segment("a.json", 1, "Alice", 2.5, 3, "more")
+
+	got, _ := Apply(model.MergedTranscript{Segments: []model.Segment{first, second}}, 3)
+	if len(got.Segments) != 1 {
+		t.Fatalf("segment count = %d, want 1", len(got.Segments))
+	}
+	if got.Segments[0].Categories != nil {
+		t.Fatalf("categories = %v, want nil", got.Segments[0].Categories)
+	}
+}
+
+func TestApplySkipsDifferentSpeakerBackchannelAsMergeBlocker(t *testing.T) {
+	first := segment("a.json", 0, "Alice", 1, 2, "first")
+	backchannel := segment("b.json", 0, "Bob", 2.2, 2.5, "yeah")
+	backchannel.Categories = []string{"backchannel"}
+	second := segment("a.json", 1, "Alice", 3, 4, "second")
+
+	got, summary := Apply(model.MergedTranscript{Segments: []model.Segment{first, backchannel, second}}, 3)
+	if summary.OriginalSegmentsMerged != 2 || summary.CoalescedSegments != 1 {
+		t.Fatalf("summary = %#v", summary)
+	}
+	if len(got.Segments) != 2 {
+		t.Fatalf("segment count = %d, want 2", len(got.Segments))
+	}
+	if got.Segments[0].Text != "first second" {
+		t.Fatalf("first output text = %q, want first second", got.Segments[0].Text)
+	}
+	if got.Segments[1].Text != "yeah" || !reflect.DeepEqual(got.Segments[1].Categories, []string{"backchannel"}) {
+		t.Fatalf("second output segment = %#v", got.Segments[1])
+	}
+}
+
+func TestApplyDifferentSpeakerNonBackchannelStillBlocksMerge(t *testing.T) {
+	first := segment("a.json", 0, "Alice", 1, 2, "first")
+	bob := segment("b.json", 0, "Bob", 2.2, 2.5, "interruption")
+	second := segment("a.json", 1, "Alice", 3, 4, "second")
+
+	got, summary := Apply(model.MergedTranscript{Segments: []model.Segment{first, bob, second}}, 3)
+	if summary.OriginalSegmentsMerged != 0 || summary.CoalescedSegments != 0 {
+		t.Fatalf("summary = %#v", summary)
+	}
+	if len(got.Segments) != 3 {
+		t.Fatalf("segment count = %d, want 3", len(got.Segments))
+	}
+}
+
 func segment(source string, sourceIndex int, speaker string, start float64, end float64, text string) model.Segment {
 	return model.Segment{
 		Source:             source,
diff --git a/internal/config/config.go b/internal/config/config.go
index 80818aa..4ebc641 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -14,7 +14,7 @@ const (
 	DefaultInputReader           = "json-files"
 	DefaultOutputModules         = "json"
 	DefaultPreprocessingModules  = "validate-raw,normalize-speakers,trim-text"
-	DefaultPostprocessingModules = "detect-overlaps,resolve-overlaps,coalesce,detect-overlaps,autocorrect,assign-ids,validate-output"
+	DefaultPostprocessingModules = "detect-overlaps,resolve-overlaps,backchannel,coalesce,detect-overlaps,autocorrect,assign-ids,validate-output"
 	DefaultOverlapWordRunGap     = 0.75
 	DefaultWordRunReorderWindow  = 0.4
 	DefaultCoalesceGap           = 3.0
diff --git a/internal/model/model.go b/internal/model/model.go
index 9c3c8f1..b129ec9 100644
--- a/internal/model/model.go
+++ b/internal/model/model.go
@@ -56,6 +56,7 @@ type Segment struct {
 	Start              float64  `json:"start"`
 	End                float64  `json:"end"`
 	Text               string   `json:"text"`
+	Categories         []string `json:"categories,omitempty"`
 	Words              []Word   `json:"words,omitempty"`
 	OverlapGroupID     int      `json:"overlap_group_id,omitempty"`
 }