Updated DefaultWordRunReorderWindow to 1.0
This commit is contained in:
@@ -171,7 +171,7 @@ func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToInde
|
||||
speakers := groupSpeakerOrder(group, segmentsBySpeaker)
|
||||
resolved := resolvedGroup{}
|
||||
for speakerIndex, speaker := range speakers {
|
||||
timedWords, untimedWords := gatherResolutionWords(segmentsBySpeaker[speaker], expandedStart, expandedEnd)
|
||||
timedWords, untimedWords := gatherResolutionWords(segmentsBySpeaker[speaker])
|
||||
if len(timedWords) == 0 {
|
||||
continue
|
||||
}
|
||||
@@ -303,7 +303,7 @@ func groupSpeakerOrder(group model.OverlapGroup, segmentsBySpeaker map[string][]
|
||||
return speakers
|
||||
}
|
||||
|
||||
func gatherResolutionWords(segments []model.Segment, groupStart float64, groupEnd float64) ([]resolutionWord, []resolutionWord) {
|
||||
func gatherResolutionWords(segments []model.Segment) ([]resolutionWord, []resolutionWord) {
|
||||
timedWords := make([]resolutionWord, 0)
|
||||
untimedWords := make([]resolutionWord, 0)
|
||||
sequence := 0
|
||||
@@ -321,9 +321,6 @@ func gatherResolutionWords(segments []model.Segment, groupStart float64, groupEn
|
||||
untimedWords = append(untimedWords, candidate)
|
||||
continue
|
||||
}
|
||||
if word.End <= groupStart || word.Start >= groupEnd {
|
||||
continue
|
||||
}
|
||||
timedWords = append(timedWords, candidate)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -72,7 +72,7 @@ func TestResolveCreatesChronologicalWordRunSegments(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveIncludesWordsByIntervalIntersection(t *testing.T) {
|
||||
func TestResolvePreservesAllWordsFromSelectedOverlapSegment(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segmentWithWords(
|
||||
@@ -101,11 +101,98 @@ func TestResolveIncludesWordsByIntervalIntersection(t *testing.T) {
|
||||
if len(got.Segments) != 1 {
|
||||
t.Fatalf("segment count = %d, want 1", len(got.Segments))
|
||||
}
|
||||
if got.Segments[0].Text != "left-edge inside right-edge" {
|
||||
if got.Segments[0].Text != "before left-edge inside right-edge after" {
|
||||
t.Fatalf("text = %q", got.Segments[0].Text)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolvePreservesPrefixWordsFromAbsorbedContextSegment(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segmentWithWords(
|
||||
"a.json",
|
||||
0,
|
||||
"Alice",
|
||||
7,
|
||||
9.5,
|
||||
word("prefix", 7.2, 7.4),
|
||||
word("near", 9.2, 9.4),
|
||||
),
|
||||
segmentWithWords("a.json", 1, "Alice", 10, 12, word("inside", 10.5, 10.7)),
|
||||
segmentWithWords("b.json", 0, "Bob", 10.2, 11.2, word("bob", 10.4, 10.6)),
|
||||
},
|
||||
OverlapGroups: []model.OverlapGroup{
|
||||
group(1, 10, 12, []string{"a.json#1", "b.json#0"}, []string{"Alice", "Bob"}),
|
||||
},
|
||||
}
|
||||
|
||||
got, _, err := Resolve(merged, 10, 0.4, 3)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
if gotTexts(got.Segments) != "prefix near inside,bob" {
|
||||
t.Fatalf("segment texts = %s", gotTexts(got.Segments))
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolvePreservesSuffixWordsFromAbsorbedContextSegment(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segmentWithWords("a.json", 0, "Alice", 10, 12, word("inside", 10.5, 10.7)),
|
||||
segmentWithWords(
|
||||
"a.json",
|
||||
1,
|
||||
"Alice",
|
||||
12.5,
|
||||
16,
|
||||
word("near", 12.7, 12.9),
|
||||
word("suffix", 15.6, 15.8),
|
||||
),
|
||||
segmentWithWords("b.json", 0, "Bob", 10.2, 11.2, word("bob", 10.4, 10.6)),
|
||||
},
|
||||
OverlapGroups: []model.OverlapGroup{
|
||||
group(1, 10, 12, []string{"a.json#0", "b.json#0"}, []string{"Alice", "Bob"}),
|
||||
},
|
||||
}
|
||||
|
||||
got, _, err := Resolve(merged, 10, 0.4, 3)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
if gotTexts(got.Segments) != "bob,inside near suffix" {
|
||||
t.Fatalf("segment texts = %s", gotTexts(got.Segments))
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveOutOfWindowWordsAffectWordRunBoundaries(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segmentWithWords(
|
||||
"a.json",
|
||||
0,
|
||||
"Alice",
|
||||
7,
|
||||
9.5,
|
||||
word("early", 7.2, 7.4),
|
||||
word("near", 9.2, 9.4),
|
||||
),
|
||||
segmentWithWords("a.json", 1, "Alice", 10, 12, word("inside", 10.5, 10.7)),
|
||||
segmentWithWords("b.json", 0, "Bob", 10.2, 11.2, word("bob", 10.4, 10.6)),
|
||||
},
|
||||
OverlapGroups: []model.OverlapGroup{
|
||||
group(1, 10, 12, []string{"a.json#1", "b.json#0"}, []string{"Alice", "Bob"}),
|
||||
},
|
||||
}
|
||||
|
||||
got, _, err := Resolve(merged, 0.75, 0.4, 3)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
if gotTexts(got.Segments) != "early,near,bob,inside" {
|
||||
t.Fatalf("segment texts = %s", gotTexts(got.Segments))
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveIncludesContextWordsAroundOverlapWindow(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
|
||||
Reference in New Issue
Block a user