All checks were successful
ci/woodpecker/tag/release Pipeline was successful
203 lines
7.0 KiB
Go
203 lines
7.0 KiB
Go
package danglers
|
|
|
|
import (
|
|
"reflect"
|
|
"testing"
|
|
|
|
"gitea.maximumdirect.net/eric/seriatim/internal/model"
|
|
)
|
|
|
|
func TestApplyMergesDanglingEndIntoNearestPriorSharedDerivedFrom(t *testing.T) {
|
|
got, summary := Apply(transcript(
|
|
segment("a", "Alice", 1, 2, "target", []string{"source#1"}),
|
|
segment("b", "Bob", 2, 3, "middle", []string{"other#1"}),
|
|
segment("a", "Alice", 3, 4, "end.", []string{"source#1"}),
|
|
))
|
|
|
|
if summary.DanglersMerged != 1 || summary.TargetsChanged != 1 {
|
|
t.Fatalf("summary = %#v", summary)
|
|
}
|
|
if len(got.Segments) != 2 {
|
|
t.Fatalf("segment count = %d, want 2", len(got.Segments))
|
|
}
|
|
assertSegment(t, got.Segments[0], "resolve-danglers:1", "target end.", 1, 4, []string{"source#1"})
|
|
if got.Segments[0].SourceSegmentIndex != nil || got.Segments[0].OverlapGroupID != 0 || got.Segments[0].ID != 0 {
|
|
t.Fatalf("stale fields not cleared: %#v", got.Segments[0])
|
|
}
|
|
}
|
|
|
|
func TestApplyMergesDanglingStartIntoNearestSubsequentSharedDerivedFrom(t *testing.T) {
|
|
got, summary := Apply(transcript(
|
|
segment("a", "Alice", 1, 2, "start", []string{"source#1"}),
|
|
segment("b", "Bob", 2, 3, "middle", []string{"other#1"}),
|
|
segment("a", "Alice", 3, 4, "target", []string{"source#1"}),
|
|
))
|
|
|
|
if summary.DanglersMerged != 1 || summary.TargetsChanged != 1 {
|
|
t.Fatalf("summary = %#v", summary)
|
|
}
|
|
if len(got.Segments) != 2 {
|
|
t.Fatalf("segment count = %d, want 2", len(got.Segments))
|
|
}
|
|
assertSegment(t, got.Segments[1], "resolve-danglers:1", "start target", 1, 4, []string{"source#1"})
|
|
}
|
|
|
|
func TestApplyUsesAnyDerivedFromIntersection(t *testing.T) {
|
|
got, _ := Apply(transcript(
|
|
segment("a", "Alice", 1, 2, "target", []string{"source#1", "source#2"}),
|
|
segment("a", "Alice", 3, 4, "end.", []string{"source#2", "source#3"}),
|
|
))
|
|
|
|
assertSegment(t, got.Segments[0], "resolve-danglers:1", "target end.", 1, 4, []string{"source#1", "source#2", "source#3"})
|
|
}
|
|
|
|
func TestApplyDoesNotMergeDanglersAcrossSpeakersBackward(t *testing.T) {
|
|
in := transcript(
|
|
segment("a", "Alice", 1, 2, "target", []string{"source#1"}),
|
|
segment("b", "Bob", 3, 4, "end.", []string{"source#1"}),
|
|
)
|
|
|
|
got, summary := Apply(in)
|
|
if summary.DanglersMerged != 0 || !reflect.DeepEqual(got, in) {
|
|
t.Fatalf("unexpected merge:\ngot %#v\nwant %#v", got, in)
|
|
}
|
|
}
|
|
|
|
func TestApplyDoesNotMergeDanglersAcrossSpeakersForward(t *testing.T) {
|
|
in := transcript(
|
|
segment("a", "Alice", 1, 2, "start", []string{"source#1"}),
|
|
segment("b", "Bob", 3, 4, "target", []string{"source#1"}),
|
|
)
|
|
|
|
got, summary := Apply(in)
|
|
if summary.DanglersMerged != 0 || !reflect.DeepEqual(got, in) {
|
|
t.Fatalf("unexpected merge:\ngot %#v\nwant %#v", got, in)
|
|
}
|
|
}
|
|
|
|
func TestApplyDoesNotMergeWithoutSharedProvenance(t *testing.T) {
|
|
in := transcript(
|
|
segment("a", "Alice", 1, 2, "target", []string{"source#1"}),
|
|
segment("a", "Alice", 3, 4, "end.", []string{"source#2"}),
|
|
)
|
|
|
|
got, summary := Apply(in)
|
|
if summary.DanglersMerged != 0 || !reflect.DeepEqual(got, in) {
|
|
t.Fatalf("unexpected merge:\ngot %#v\nwant %#v", got, in)
|
|
}
|
|
}
|
|
|
|
func TestApplyDoesNotMergeLongDanglers(t *testing.T) {
|
|
in := transcript(
|
|
segment("a", "Alice", 1, 2, "target words here", []string{"source#1"}),
|
|
segment("a", "Alice", 3, 4, "three word end.", []string{"source#1"}),
|
|
)
|
|
|
|
got, summary := Apply(in)
|
|
if summary.DanglersMerged != 0 || !reflect.DeepEqual(got, in) {
|
|
t.Fatalf("unexpected merge:\ngot %#v\nwant %#v", got, in)
|
|
}
|
|
}
|
|
|
|
func TestApplyDanglingEndRequiresPunctuation(t *testing.T) {
|
|
in := transcript(
|
|
segment("a", "Alice", 1, 2, "target", []string{"source#1"}),
|
|
segment("a", "Alice", 3, 4, "end", []string{"source#1"}),
|
|
)
|
|
|
|
resolved, _ := resolveDanglingEnds(in)
|
|
if !reflect.DeepEqual(resolved, in) {
|
|
t.Fatalf("punctuation-free end should not merge backward:\ngot %#v\nwant %#v", resolved, in)
|
|
}
|
|
}
|
|
|
|
func TestApplyDanglingStartDoesNotRequirePunctuation(t *testing.T) {
|
|
got, summary := Apply(transcript(
|
|
segment("a", "Alice", 1, 2, "start", []string{"source#1"}),
|
|
segment("a", "Alice", 3, 4, "target words", []string{"source#1"}),
|
|
))
|
|
|
|
if summary.DanglersMerged != 1 {
|
|
t.Fatalf("summary = %#v", summary)
|
|
}
|
|
assertSegment(t, got.Segments[0], "resolve-danglers:1", "start target words", 1, 4, []string{"source#1"})
|
|
}
|
|
|
|
func TestApplyMergesMultipleDanglersIntoOneTarget(t *testing.T) {
|
|
got, summary := Apply(transcript(
|
|
segment("a", "Alice", 1, 2, "prefix", []string{"source#1"}),
|
|
segment("a", "Alice", 3, 4, "target", []string{"source#1"}),
|
|
segment("a", "Alice", 5, 6, "tail.", []string{"source#1"}),
|
|
))
|
|
|
|
if summary.DanglersMerged != 2 || summary.TargetsChanged != 1 {
|
|
t.Fatalf("summary = %#v", summary)
|
|
}
|
|
if len(got.Segments) != 1 {
|
|
t.Fatalf("segment count = %d, want 1", len(got.Segments))
|
|
}
|
|
assertSegment(t, got.Segments[0], "resolve-danglers:1", "prefix target tail.", 1, 6, []string{"source#1"})
|
|
}
|
|
|
|
func TestApplyMergedSegmentShape(t *testing.T) {
|
|
sourceIndex := 1
|
|
target := segment("a", "Alice", 2, 3, "target", []string{"a#1"})
|
|
target.ID = 99
|
|
target.SourceSegmentIndex = &sourceIndex
|
|
target.OverlapGroupID = 7
|
|
target.Categories = []string{"manual"}
|
|
target.Words = []model.Word{{Text: "target", Start: 2, End: 3, Timed: true}}
|
|
|
|
dangler := segment("b", "Alice", 1, 1.5, "start", []string{"a#1", "b#2"})
|
|
dangler.Categories = []string{"dangler"}
|
|
dangler.Words = []model.Word{{Text: "start", Start: 1, End: 1.5, Timed: true}}
|
|
|
|
got, _ := Apply(transcript(dangler, target))
|
|
merged := got.Segments[0]
|
|
if merged.Source != "derived" {
|
|
t.Fatalf("source = %q, want derived", merged.Source)
|
|
}
|
|
if !reflect.DeepEqual(merged.Categories, []string{"manual"}) {
|
|
t.Fatalf("categories = %v, want target categories only", merged.Categories)
|
|
}
|
|
if gotWords := []string{merged.Words[0].Text, merged.Words[1].Text}; !reflect.DeepEqual(gotWords, []string{"start", "target"}) {
|
|
t.Fatalf("word order = %v", gotWords)
|
|
}
|
|
assertSegment(t, merged, "resolve-danglers:1", "start target", 1, 3, []string{"a#1", "b#2"})
|
|
if merged.ID != 0 || merged.SourceSegmentIndex != nil || merged.OverlapGroupID != 0 {
|
|
t.Fatalf("stale fields not cleared: %#v", merged)
|
|
}
|
|
}
|
|
|
|
func transcript(segments ...model.Segment) model.MergedTranscript {
|
|
return model.MergedTranscript{Segments: segments}
|
|
}
|
|
|
|
func segment(source string, speaker string, start float64, end float64, text string, derivedFrom []string) model.Segment {
|
|
return model.Segment{
|
|
Source: source,
|
|
SourceRef: source + "-ref",
|
|
DerivedFrom: append([]string(nil), derivedFrom...),
|
|
Speaker: speaker,
|
|
Start: start,
|
|
End: end,
|
|
Text: text,
|
|
}
|
|
}
|
|
|
|
func assertSegment(t *testing.T, segment model.Segment, sourceRef string, text string, start float64, end float64, derivedFrom []string) {
|
|
t.Helper()
|
|
if segment.SourceRef != sourceRef {
|
|
t.Fatalf("source_ref = %q, want %q", segment.SourceRef, sourceRef)
|
|
}
|
|
if segment.Text != text {
|
|
t.Fatalf("text = %q, want %q", segment.Text, text)
|
|
}
|
|
if segment.Start != start || segment.End != end {
|
|
t.Fatalf("bounds = %f-%f, want %f-%f", segment.Start, segment.End, start, end)
|
|
}
|
|
if !reflect.DeepEqual(segment.DerivedFrom, derivedFrom) {
|
|
t.Fatalf("derived_from = %v, want %v", segment.DerivedFrom, derivedFrom)
|
|
}
|
|
}
|