package builtin

import (
	"context"
	"fmt"
	"path/filepath"
	"strings"

	"gitea.maximumdirect.net/eric/seriatim/internal/config"
	"gitea.maximumdirect.net/eric/seriatim/internal/model"
	"gitea.maximumdirect.net/eric/seriatim/internal/pipeline"
	"gitea.maximumdirect.net/eric/seriatim/internal/report"
	"gitea.maximumdirect.net/eric/seriatim/internal/speaker"
)

type validateRaw struct{}

func (validateRaw) Name() string {
	return "validate-raw"
}

func (validateRaw) Requires() pipeline.ModelState {
	return pipeline.StateRaw
}

func (validateRaw) Produces() pipeline.ModelState {
	return pipeline.StateRaw
}

func (validateRaw) Process(ctx context.Context, in pipeline.PreprocessState, cfg config.Config) (pipeline.PreprocessState, []report.Event, error) {
	if err := ctx.Err(); err != nil {
		return pipeline.PreprocessState{}, nil, err
	}
	if in.State != pipeline.StateRaw {
		return pipeline.PreprocessState{}, nil, fmt.Errorf("preprocessing module %q requires state %q but received %q", "validate-raw", pipeline.StateRaw, in.State)
	}
	if len(in.Raw) == 0 {
		return pipeline.PreprocessState{}, nil, fmt.Errorf("validate-raw: no raw transcript(s) to validate")
	}

	for transcriptIndex, transcript := range in.Raw {
		if strings.TrimSpace(transcript.Source) == "" {
			return pipeline.PreprocessState{}, nil, fmt.Errorf("validate-raw: raw transcript %d has empty source", transcriptIndex)
		}
		for segmentIndex, segment := range transcript.Segments {
			if segment.Start < 0 {
				return pipeline.PreprocessState{}, nil, fmt.Errorf("validate-raw: raw transcript %q segment %d has negative start", transcript.Source, segmentIndex)
			}
			if segment.End < segment.Start {
				return pipeline.PreprocessState{}, nil, fmt.Errorf("validate-raw: raw transcript %q segment %d has end before start", transcript.Source, segmentIndex)
			}
			for wordIndex, word := range segment.Words {
				if !word.Timed {
					continue
				}
				if word.Start < 0 {
					return pipeline.PreprocessState{}, nil, fmt.Errorf("validate-raw: raw transcript %q segment %d word %d has negative start", transcript.Source, segmentIndex, wordIndex)
				}
				if word.End < word.Start {
					return pipeline.PreprocessState{}, nil, fmt.Errorf("validate-raw: raw transcript %q segment %d word %d has end before start", transcript.Source, segmentIndex, wordIndex)
				}
			}
		}
	}

	return in, []report.Event{
		report.Info("preprocessing", "validate-raw", fmt.Sprintf("validated %d raw transcript(s)", len(in.Raw))),
	}, nil
}

type trimText struct{}

func (trimText) Name() string {
	return "trim-text"
}

func (trimText) Requires() pipeline.ModelState {
	return pipeline.StateCanonical
}

func (trimText) Produces() pipeline.ModelState {
	return pipeline.StateCanonical
}

func (trimText) Process(ctx context.Context, in pipeline.PreprocessState, cfg config.Config) (pipeline.PreprocessState, []report.Event, error) {
	if err := ctx.Err(); err != nil {
		return pipeline.PreprocessState{}, nil, err
	}
	if in.State != pipeline.StateCanonical {
		return pipeline.PreprocessState{}, nil, fmt.Errorf("preprocessing module %q requires state %q but received %q", "trim-text", pipeline.StateCanonical, in.State)
	}

	for transcriptIndex := range in.Canonical {
		for segmentIndex := range in.Canonical[transcriptIndex].Segments {
			in.Canonical[transcriptIndex].Segments[segmentIndex].Text = strings.TrimSpace(in.Canonical[transcriptIndex].Segments[segmentIndex].Text)
		}
	}

	return in, []report.Event{
		report.Info("preprocessing", "trim-text", "trimmed canonical segment text"),
	}, nil
}

type normalizeSpeakers struct{}

func (normalizeSpeakers) Name() string {
	return "normalize-speakers"
}

func (normalizeSpeakers) Requires() pipeline.ModelState {
	return pipeline.StateRaw
}

func (normalizeSpeakers) Produces() pipeline.ModelState {
	return pipeline.StateCanonical
}

func (normalizeSpeakers) Process(ctx context.Context, in pipeline.PreprocessState, cfg config.Config) (pipeline.PreprocessState, []report.Event, error) {
	if err := ctx.Err(); err != nil {
		return pipeline.PreprocessState{}, nil, err
	}
	if in.State != pipeline.StateRaw {
		return pipeline.PreprocessState{}, nil, fmt.Errorf("preprocessing module %q requires state %q but received %q", "normalize-speakers", pipeline.StateRaw, in.State)
	}

	var speakers speaker.Map
	useSpeakerMap := cfg.SpeakersFile != ""
	if useSpeakerMap {
		var err error
		speakers, err = speaker.LoadMap(cfg.SpeakersFile)
		if err != nil {
			return pipeline.PreprocessState{}, nil, err
		}
	}

	canonical := make([]model.CanonicalTranscript, 0, len(in.Raw))
	for _, raw := range in.Raw {
		canonicalSpeaker := filepath.Base(raw.Source)
		if useSpeakerMap {
			var err error
			canonicalSpeaker, err = speakers.SpeakerForSource(raw.Source)
			if err != nil {
				return pipeline.PreprocessState{}, nil, err
			}
		}

		segments := make([]model.Segment, 0, len(raw.Segments))
		for index, rawSegment := range raw.Segments {
			sourceSegmentIndex := index
			segments = append(segments, model.Segment{
				Source:             raw.Source,
				SourceSegmentIndex: &sourceSegmentIndex,
				Speaker:            canonicalSpeaker,
				Start:              rawSegment.Start,
				End:                rawSegment.End,
				Text:               rawSegment.Text,
				Words:              append([]model.Word(nil), rawSegment.Words...),
			})
		}

		canonical = append(canonical, model.CanonicalTranscript{
			Source:   raw.Source,
			Segments: segments,
		})
	}

	message := "created canonical transcript(s) from raw input"
	if !useSpeakerMap {
		message = "created canonical transcript(s) using input basenames as speaker labels"
	}

	return pipeline.PreprocessState{
			State:     pipeline.StateCanonical,
			Raw:       append([]model.RawTranscript(nil), in.Raw...),
			Canonical: canonical,
		}, []report.Event{
			report.Info("preprocessing", "normalize-speakers", message),
		}, nil
}