Bugfix in the coalesce module
This commit is contained in:
@@ -263,7 +263,7 @@ The default pipeline runs `coalesce` before the second overlap detection pass. I
|
|||||||
|
|
||||||
Coalesced segments use `source_ref` values such as `coalesce:1`, include `derived_from`, and omit `source_segment_index`.
|
Coalesced segments use `source_ref` values such as `coalesce:1`, include `derived_from`, and omit `source_segment_index`.
|
||||||
|
|
||||||
Different-speaker backchannel and filler segments do not block coalescing of surrounding same-speaker segments. When same-speaker segments are coalesced, any `backchannel` or `filler` category from the merged inputs is dropped from the coalesced segment.
|
Different-speaker backchannel and filler segments do not block coalescing of surrounding same-speaker segments. Same-speaker backchannel and filler segments are merged normally when they are within `--coalesce-gap`. When same-speaker segments are coalesced, any `backchannel` or `filler` category from the merged inputs is dropped from the coalesced segment.
|
||||||
|
|
||||||
## Autocorrect
|
## Autocorrect
|
||||||
|
|
||||||
|
|||||||
@@ -665,6 +665,94 @@ func TestMergeTagsFillerSegments(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestMergeCoalescesSameSpeakerBackchannelWithFollowingSegment(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
input := writeJSONFile(t, dir, "zach.json", `{
|
||||||
|
"segments": [
|
||||||
|
{"start": 1, "end": 1.7, "text": "That makes sense."},
|
||||||
|
{"start": 1.72, "end": 4, "text": "So, like, next thought."}
|
||||||
|
]
|
||||||
|
}`)
|
||||||
|
output := filepath.Join(dir, "merged.json")
|
||||||
|
|
||||||
|
err := executeMerge(
|
||||||
|
"--input-file", input,
|
||||||
|
"--output-file", output,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("merge failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var transcript model.FinalTranscript
|
||||||
|
readJSON(t, output, &transcript)
|
||||||
|
if len(transcript.Segments) != 1 {
|
||||||
|
t.Fatalf("segment count = %d, want 1", len(transcript.Segments))
|
||||||
|
}
|
||||||
|
segment := transcript.Segments[0]
|
||||||
|
if segment.Text != "That makes sense. So, like, next thought." {
|
||||||
|
t.Fatalf("text = %q", segment.Text)
|
||||||
|
}
|
||||||
|
if segment.SourceRef != "coalesce:1" {
|
||||||
|
t.Fatalf("source_ref = %q, want coalesce:1", segment.SourceRef)
|
||||||
|
}
|
||||||
|
if !equalStrings(segment.DerivedFrom, []string{input + "#0", input + "#1"}) {
|
||||||
|
t.Fatalf("derived_from = %v", segment.DerivedFrom)
|
||||||
|
}
|
||||||
|
if len(segment.Categories) != 0 {
|
||||||
|
t.Fatalf("categories = %v, want none", segment.Categories)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMergeCoalescesBackchannelAfterDifferentSpeakerIntoFollowingSameSpeakerSegment(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
inputA := writeJSONFile(t, dir, "mike.json", `{
|
||||||
|
"segments": [
|
||||||
|
{"start": 1, "end": 2, "text": "previous speaker"}
|
||||||
|
]
|
||||||
|
}`)
|
||||||
|
inputB := writeJSONFile(t, dir, "zach.json", `{
|
||||||
|
"segments": [
|
||||||
|
{"start": 2.5, "end": 3, "text": "That makes sense."},
|
||||||
|
{"start": 3.02, "end": 6, "text": "So, like, next thought."}
|
||||||
|
]
|
||||||
|
}`)
|
||||||
|
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
||||||
|
- speaker: Mike
|
||||||
|
match: ["mike.json"]
|
||||||
|
- speaker: Zach
|
||||||
|
match: ["zach.json"]
|
||||||
|
`)
|
||||||
|
output := filepath.Join(dir, "merged.json")
|
||||||
|
|
||||||
|
err := executeMerge(
|
||||||
|
"--input-file", inputA,
|
||||||
|
"--input-file", inputB,
|
||||||
|
"--speakers", speakers,
|
||||||
|
"--output-file", output,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("merge failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var transcript model.FinalTranscript
|
||||||
|
readJSON(t, output, &transcript)
|
||||||
|
if len(transcript.Segments) != 2 {
|
||||||
|
t.Fatalf("segment count = %d, want 2", len(transcript.Segments))
|
||||||
|
}
|
||||||
|
if transcript.Segments[0].Speaker != "Mike" || transcript.Segments[0].Text != "previous speaker" {
|
||||||
|
t.Fatalf("first segment = %#v, want Mike original", transcript.Segments[0])
|
||||||
|
}
|
||||||
|
if transcript.Segments[1].Speaker != "Zach" || transcript.Segments[1].Text != "That makes sense. So, like, next thought." {
|
||||||
|
t.Fatalf("second segment = %#v, want coalesced Zach", transcript.Segments[1])
|
||||||
|
}
|
||||||
|
if !equalStrings(transcript.Segments[1].DerivedFrom, []string{inputB + "#0", inputB + "#1"}) {
|
||||||
|
t.Fatalf("derived_from = %v", transcript.Segments[1].DerivedFrom)
|
||||||
|
}
|
||||||
|
if len(transcript.Segments[1].Categories) != 0 {
|
||||||
|
t.Fatalf("categories = %v, want none", transcript.Segments[1].Categories)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestMergeCoalescesAroundDifferentSpeakerBackchannel(t *testing.T) {
|
func TestMergeCoalescesAroundDifferentSpeakerBackchannel(t *testing.T) {
|
||||||
dir := t.TempDir()
|
dir := t.TempDir()
|
||||||
inputA := writeJSONFile(t, dir, "a.json", `{
|
inputA := writeJSONFile(t, dir, "a.json", `{
|
||||||
|
|||||||
@@ -29,16 +29,22 @@ func Apply(in model.MergedTranscript, gap float64) (model.MergedTranscript, Summ
|
|||||||
current := newRun(in.Segments[0])
|
current := newRun(in.Segments[0])
|
||||||
pendingSkipped := make([]model.Segment, 0)
|
pendingSkipped := make([]model.Segment, 0)
|
||||||
for _, segment := range in.Segments[1:] {
|
for _, segment := range in.Segments[1:] {
|
||||||
if current.canMerge(segment, gap) {
|
if segment.Speaker == current.speaker() {
|
||||||
current.add(segment)
|
if current.canMerge(segment, gap) {
|
||||||
continue
|
current.add(segment)
|
||||||
}
|
continue
|
||||||
if segment.Speaker != current.speaker() && hasAnyCategory(segment, "backchannel", "filler") {
|
}
|
||||||
|
} else if isSkippableInterjection(segment) {
|
||||||
pendingSkipped = append(pendingSkipped, segment)
|
pendingSkipped = append(pendingSkipped, segment)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
coalescedID = appendRun(&out, current, coalescedID, &summary)
|
coalescedID = appendRun(&out, current, coalescedID, &summary)
|
||||||
|
if seeded, remaining, ok := seedRunFromPending(pendingSkipped, segment, gap); ok {
|
||||||
|
pendingSkipped = remaining
|
||||||
|
current = seeded
|
||||||
|
continue
|
||||||
|
}
|
||||||
out.Segments = append(out.Segments, pendingSkipped...)
|
out.Segments = append(out.Segments, pendingSkipped...)
|
||||||
pendingSkipped = pendingSkipped[:0]
|
pendingSkipped = pendingSkipped[:0]
|
||||||
current = newRun(segment)
|
current = newRun(segment)
|
||||||
@@ -72,6 +78,37 @@ func (r *run) add(segment model.Segment) {
|
|||||||
r.segments = append(r.segments, segment)
|
r.segments = append(r.segments, segment)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func seedRunFromPending(pending []model.Segment, segment model.Segment, gap float64) (run, []model.Segment, bool) {
|
||||||
|
for start := range pending {
|
||||||
|
if pending[start].Speaker != segment.Speaker {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
candidate := newRun(pending[start])
|
||||||
|
selected := map[int]struct{}{start: {}}
|
||||||
|
for index := start + 1; index < len(pending); index++ {
|
||||||
|
if pending[index].Speaker == segment.Speaker && candidate.canMerge(pending[index], gap) {
|
||||||
|
candidate.add(pending[index])
|
||||||
|
selected[index] = struct{}{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !candidate.canMerge(segment, gap) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
candidate.add(segment)
|
||||||
|
remaining := make([]model.Segment, 0, len(pending)-len(selected))
|
||||||
|
for index, skipped := range pending {
|
||||||
|
if _, ok := selected[index]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
remaining = append(remaining, skipped)
|
||||||
|
}
|
||||||
|
return candidate, remaining, true
|
||||||
|
}
|
||||||
|
return run{}, pending, false
|
||||||
|
}
|
||||||
|
|
||||||
func appendRun(out *model.MergedTranscript, current run, coalescedID int, summary *Summary) int {
|
func appendRun(out *model.MergedTranscript, current run, coalescedID int, summary *Summary) int {
|
||||||
if len(current.segments) == 1 {
|
if len(current.segments) == 1 {
|
||||||
out.Segments = append(out.Segments, current.segments[0])
|
out.Segments = append(out.Segments, current.segments[0])
|
||||||
@@ -129,6 +166,10 @@ func segmentRef(segment model.Segment) string {
|
|||||||
return segment.Source
|
return segment.Source
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isSkippableInterjection(segment model.Segment) bool {
|
||||||
|
return hasAnyCategory(segment, "backchannel", "filler")
|
||||||
|
}
|
||||||
|
|
||||||
func hasAnyCategory(segment model.Segment, categories ...string) bool {
|
func hasAnyCategory(segment model.Segment, categories ...string) bool {
|
||||||
for _, existing := range segment.Categories {
|
for _, existing := range segment.Categories {
|
||||||
for _, category := range categories {
|
for _, category := range categories {
|
||||||
|
|||||||
@@ -165,6 +165,119 @@ func TestApplyDropsFillerCategoryFromMergedSameSpeakerRun(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestApplyMergesSameSpeakerBackchannelIntoDerivedRun(t *testing.T) {
|
||||||
|
first := segment("zach.json", 110, "Zach", 7811.778, 7812.478, "That makes sense.")
|
||||||
|
first.Categories = []string{"backchannel"}
|
||||||
|
second := model.Segment{
|
||||||
|
Source: "zach.json",
|
||||||
|
SourceRef: "coalesce:347",
|
||||||
|
DerivedFrom: []string{"zach.json#111", "zach.json#112"},
|
||||||
|
Speaker: "Zach",
|
||||||
|
Start: 7812.498,
|
||||||
|
End: 7824.045,
|
||||||
|
Text: "So, like, I'm above the silence field.",
|
||||||
|
}
|
||||||
|
|
||||||
|
got, summary := Apply(model.MergedTranscript{Segments: []model.Segment{first, second}}, 3)
|
||||||
|
if summary.OriginalSegmentsMerged != 2 || summary.CoalescedSegments != 1 {
|
||||||
|
t.Fatalf("summary = %#v", summary)
|
||||||
|
}
|
||||||
|
if len(got.Segments) != 1 {
|
||||||
|
t.Fatalf("segment count = %d, want 1", len(got.Segments))
|
||||||
|
}
|
||||||
|
if got.Segments[0].Text != "That makes sense. So, like, I'm above the silence field." {
|
||||||
|
t.Fatalf("text = %q", got.Segments[0].Text)
|
||||||
|
}
|
||||||
|
if got.Segments[0].SourceRef != "coalesce:1" {
|
||||||
|
t.Fatalf("source_ref = %q, want coalesce:1", got.Segments[0].SourceRef)
|
||||||
|
}
|
||||||
|
if !reflect.DeepEqual(got.Segments[0].DerivedFrom, []string{"zach.json#110", "coalesce:347"}) {
|
||||||
|
t.Fatalf("derived_from = %v", got.Segments[0].DerivedFrom)
|
||||||
|
}
|
||||||
|
if got.Segments[0].Categories != nil {
|
||||||
|
t.Fatalf("categories = %v, want nil", got.Segments[0].Categories)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestApplyMergesSameSpeakerFillerIntoDerivedRun(t *testing.T) {
|
||||||
|
first := segment("zach.json", 110, "Zach", 1, 1.5, "um")
|
||||||
|
first.Categories = []string{"filler"}
|
||||||
|
second := model.Segment{
|
||||||
|
Source: "zach.json",
|
||||||
|
SourceRef: "coalesce:12",
|
||||||
|
DerivedFrom: []string{"zach.json#111", "zach.json#112"},
|
||||||
|
Speaker: "Zach",
|
||||||
|
Start: 1.6,
|
||||||
|
End: 4,
|
||||||
|
Text: "next thought",
|
||||||
|
}
|
||||||
|
|
||||||
|
got, summary := Apply(model.MergedTranscript{Segments: []model.Segment{first, second}}, 3)
|
||||||
|
if summary.OriginalSegmentsMerged != 2 || summary.CoalescedSegments != 1 {
|
||||||
|
t.Fatalf("summary = %#v", summary)
|
||||||
|
}
|
||||||
|
if len(got.Segments) != 1 {
|
||||||
|
t.Fatalf("segment count = %d, want 1", len(got.Segments))
|
||||||
|
}
|
||||||
|
if got.Segments[0].Text != "um next thought" {
|
||||||
|
t.Fatalf("text = %q", got.Segments[0].Text)
|
||||||
|
}
|
||||||
|
if !reflect.DeepEqual(got.Segments[0].DerivedFrom, []string{"zach.json#110", "coalesce:12"}) {
|
||||||
|
t.Fatalf("derived_from = %v", got.Segments[0].DerivedFrom)
|
||||||
|
}
|
||||||
|
if got.Segments[0].Categories != nil {
|
||||||
|
t.Fatalf("categories = %v, want nil", got.Segments[0].Categories)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestApplyUsesSkippedBackchannelToSeedNextSameSpeakerRun(t *testing.T) {
|
||||||
|
mike := segment("mike.json", 367, "Mike", 7803.57, 7810.719, "It's very easy to notice.")
|
||||||
|
backchannel := segment("zach.json", 110, "Zach", 7811.778, 7812.478, "That makes sense.")
|
||||||
|
backchannel.Categories = []string{"backchannel"}
|
||||||
|
next := segment("zach.json", 111, "Zach", 7812.498, 7820, "So, like, next thought.")
|
||||||
|
|
||||||
|
got, summary := Apply(model.MergedTranscript{Segments: []model.Segment{mike, backchannel, next}}, 3)
|
||||||
|
if summary.OriginalSegmentsMerged != 2 || summary.CoalescedSegments != 1 {
|
||||||
|
t.Fatalf("summary = %#v", summary)
|
||||||
|
}
|
||||||
|
if len(got.Segments) != 2 {
|
||||||
|
t.Fatalf("segment count = %d, want 2", len(got.Segments))
|
||||||
|
}
|
||||||
|
if got.Segments[0].Text != "It's very easy to notice." {
|
||||||
|
t.Fatalf("first text = %q", got.Segments[0].Text)
|
||||||
|
}
|
||||||
|
if got.Segments[1].Speaker != "Zach" || got.Segments[1].Text != "That makes sense. So, like, next thought." {
|
||||||
|
t.Fatalf("second segment = %#v", got.Segments[1])
|
||||||
|
}
|
||||||
|
if !reflect.DeepEqual(got.Segments[1].DerivedFrom, []string{"zach.json#110", "zach.json#111"}) {
|
||||||
|
t.Fatalf("derived_from = %v", got.Segments[1].DerivedFrom)
|
||||||
|
}
|
||||||
|
if got.Segments[1].Categories != nil {
|
||||||
|
t.Fatalf("categories = %v, want nil", got.Segments[1].Categories)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestApplyUsesSkippedFillerToSeedNextSameSpeakerRun(t *testing.T) {
|
||||||
|
alice := segment("alice.json", 0, "Alice", 1, 2, "first")
|
||||||
|
filler := segment("bob.json", 0, "Bob", 2.1, 2.3, "um")
|
||||||
|
filler.Categories = []string{"filler"}
|
||||||
|
bob := segment("bob.json", 1, "Bob", 2.4, 4, "actual thought")
|
||||||
|
|
||||||
|
got, summary := Apply(model.MergedTranscript{Segments: []model.Segment{alice, filler, bob}}, 3)
|
||||||
|
if summary.OriginalSegmentsMerged != 2 || summary.CoalescedSegments != 1 {
|
||||||
|
t.Fatalf("summary = %#v", summary)
|
||||||
|
}
|
||||||
|
if len(got.Segments) != 2 {
|
||||||
|
t.Fatalf("segment count = %d, want 2", len(got.Segments))
|
||||||
|
}
|
||||||
|
if got.Segments[1].Text != "um actual thought" {
|
||||||
|
t.Fatalf("second text = %q", got.Segments[1].Text)
|
||||||
|
}
|
||||||
|
if got.Segments[1].Categories != nil {
|
||||||
|
t.Fatalf("categories = %v, want nil", got.Segments[1].Categories)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestApplySkipsDifferentSpeakerBackchannelAsMergeBlocker(t *testing.T) {
|
func TestApplySkipsDifferentSpeakerBackchannelAsMergeBlocker(t *testing.T) {
|
||||||
first := segment("a.json", 0, "Alice", 1, 2, "first")
|
first := segment("a.json", 0, "Alice", 1, 2, "first")
|
||||||
backchannel := segment("b.json", 0, "Bob", 2.2, 2.5, "yeah")
|
backchannel := segment("b.json", 0, "Bob", 2.2, 2.5, "yeah")
|
||||||
|
|||||||
Reference in New Issue
Block a user