package nws import ( "fmt" "html" "regexp" "strconv" "strings" "time" ) type ForecastDiscussion struct { OfficeID string OfficeName string Product string IssuedAt time.Time UpdatedAt *time.Time KeyMessages []string ShortTerm *ForecastDiscussionSection LongTerm *ForecastDiscussionSection } type ForecastDiscussionSection struct { Qualifier string IssuedAt *time.Time Text string } var ( forecastDiscussionHeaderRE = regexp.MustCompile(`^\.(KEY MESSAGES|SHORT TERM|LONG TERM|AVIATION)\.\.\.(.*)$`) forecastDiscussionAFDRE = regexp.MustCompile(`^AFD([A-Z]{3})$`) forecastDiscussionWMORE = regexp.MustCompile(`\bK([A-Z]{3})\b`) forecastDiscussionSigRE = regexp.MustCompile(`^[A-Z]{2,6}$`) ) func ParseForecastDiscussionHTML(raw string) (ForecastDiscussion, error) { text, err := ExtractForecastDiscussionText(raw) if err != nil { return ForecastDiscussion{}, err } parsed, err := ParseForecastDiscussionText(text) if err != nil { return ForecastDiscussion{}, err } parsed.UpdatedAt = parseForecastDiscussionUpdatedAt(raw) return parsed, nil } func ExtractForecastDiscussionText(raw string) (string, error) { lower := strings.ToLower(raw) searchFrom := 0 for { openStart := strings.Index(lower[searchFrom:], " block") } openStart += searchFrom openEnd := strings.Index(lower[openStart:], ">") if openEnd < 0 { return "", fmt.Errorf("unterminated
 tag")
		}
		openEnd += openStart

		tag := lower[openStart : openEnd+1]
		if isGlossaryProductTag(tag) {
			closeStart := strings.Index(lower[openEnd+1:], "
") if closeStart < 0 { return "", fmt.Errorf("missing closing for glossaryProduct block") } closeStart += openEnd + 1 text := html.UnescapeString(raw[openEnd+1 : closeStart]) text = strings.ReplaceAll(text, "\r\n", "\n") text = strings.ReplaceAll(text, "\r", "\n") return text, nil } searchFrom = openEnd + 1 } } func ParseForecastDiscussionText(text string) (ForecastDiscussion, error) { lines := splitLines(text) officeID := parseForecastDiscussionOfficeID(lines) officeName, issuedAt, err := parseForecastDiscussionHeader(lines) if err != nil { return ForecastDiscussion{}, err } out := ForecastDiscussion{ OfficeID: officeID, OfficeName: officeName, Product: "afd", IssuedAt: issuedAt.UTC(), } if block, ok := extractForecastDiscussionSection(lines, "KEY MESSAGES"); ok { out.KeyMessages = parseForecastDiscussionKeyMessages(block) } if block, ok := extractForecastDiscussionSection(lines, "SHORT TERM"); ok { section, err := parseForecastDiscussionTextSection(block) if err != nil { return ForecastDiscussion{}, fmt.Errorf("parse SHORT TERM: %w", err) } out.ShortTerm = §ion } if block, ok := extractForecastDiscussionSection(lines, "LONG TERM"); ok { section, err := parseForecastDiscussionTextSection(block) if err != nil { return ForecastDiscussion{}, fmt.Errorf("parse LONG TERM: %w", err) } out.LongTerm = §ion } return out, nil } func isGlossaryProductTag(tag string) bool { tag = strings.ToLower(tag) return strings.Contains(tag, `class="glossaryproduct"`) || strings.Contains(tag, `class='glossaryproduct'`) || strings.Contains(tag, `class="glossaryproduct `) || strings.Contains(tag, `class='glossaryproduct `) } func parseForecastDiscussionUpdatedAt(raw string) *time.Time { lower := strings.ToLower(raw) searchFrom := 0 for { metaStart := strings.Index(lower[searchFrom:], "") if metaEnd < 0 { return nil } metaEnd += metaStart tag := raw[metaStart : metaEnd+1] if !strings.EqualFold(strings.TrimSpace(extractHTMLAttr(tag, "name")), "DC.date.created") { searchFrom = metaEnd + 1 continue } content := strings.TrimSpace(extractHTMLAttr(tag, "content")) if content == "" { return nil } t, err := ParseTime(content) if err != nil { return nil } tt := t.UTC() return &tt } } func extractHTMLAttr(tag, attr string) string { lower := strings.ToLower(tag) attrLower := strings.ToLower(attr) for i := 0; i < len(lower); i++ { idx := strings.Index(lower[i:], attrLower) if idx < 0 { return "" } idx += i if idx > 0 { prev := lower[idx-1] if isAttrNameChar(prev) { i = idx + len(attrLower) continue } } j := idx + len(attrLower) for j < len(lower) && isHTMLSpace(lower[j]) { j++ } if j >= len(lower) || lower[j] != '=' { i = idx + len(attrLower) continue } j++ for j < len(lower) && isHTMLSpace(lower[j]) { j++ } if j >= len(tag) { return "" } quote := tag[j] if quote != '"' && quote != '\'' { return "" } j++ k := j for k < len(tag) && tag[k] != quote { k++ } if k >= len(tag) { return "" } return html.UnescapeString(tag[j:k]) } return "" } func isHTMLSpace(b byte) bool { switch b { case ' ', '\n', '\r', '\t', '\f': return true default: return false } } func isAttrNameChar(b byte) bool { switch { case b >= 'a' && b <= 'z': return true case b >= 'A' && b <= 'Z': return true case b >= '0' && b <= '9': return true case b == '-' || b == '_' || b == ':': return true default: return false } } func splitLines(text string) []string { text = strings.ReplaceAll(text, "\r\n", "\n") text = strings.ReplaceAll(text, "\r", "\n") return strings.Split(text, "\n") } func parseForecastDiscussionOfficeID(lines []string) string { for _, raw := range lines { line := strings.TrimSpace(raw) if m := forecastDiscussionAFDRE.FindStringSubmatch(line); len(m) == 2 { return m[1] } } for _, raw := range lines { line := strings.TrimSpace(raw) if m := forecastDiscussionWMORE.FindStringSubmatch(line); len(m) == 2 { return m[1] } } return "" } func parseForecastDiscussionHeader(lines []string) (string, time.Time, error) { for i, raw := range lines { line := strings.TrimSpace(raw) if !strings.HasPrefix(line, "National Weather Service ") { continue } officeName := line for j := i + 1; j < len(lines); j++ { tsLine := strings.TrimSpace(lines[j]) if tsLine == "" { continue } issuedAt, err := parseForecastDiscussionIssueTime(tsLine) if err != nil { return "", time.Time{}, fmt.Errorf("parse bulletin issuedAt %q: %w", tsLine, err) } return officeName, issuedAt.UTC(), nil } return "", time.Time{}, fmt.Errorf("missing bulletin issue time after office line") } return "", time.Time{}, fmt.Errorf("missing office header") } func parseForecastDiscussionIssueTime(line string) (time.Time, error) { line = strings.TrimSpace(line) line = strings.TrimPrefix(line, "Issued at ") line = strings.TrimSpace(line) parts := strings.Fields(line) if len(parts) != 7 { return time.Time{}, fmt.Errorf("unexpected issue time format") } loc, err := forecastDiscussionLocation(parts[2]) if err != nil { return time.Time{}, err } datePart, err := time.Parse("Mon Jan 2 2006", strings.Join(parts[3:], " ")) if err != nil { return time.Time{}, err } hour, minute, err := parseForecastDiscussionClock(parts[0], parts[1]) if err != nil { return time.Time{}, err } return time.Date( datePart.Year(), datePart.Month(), datePart.Day(), hour, minute, 0, 0, loc, ), nil } func parseForecastDiscussionClock(rawClock, rawAMPM string) (int, int, error) { clock := strings.TrimSpace(rawClock) ampm := strings.ToUpper(strings.TrimSpace(rawAMPM)) if ampm != "AM" && ampm != "PM" { return 0, 0, fmt.Errorf("unexpected meridiem %q", rawAMPM) } n, err := strconv.Atoi(clock) if err != nil { return 0, 0, fmt.Errorf("invalid clock %q", rawClock) } hour := n minute := 0 if len(clock) >= 3 { hour = n / 100 minute = n % 100 } if hour < 1 || hour > 12 { return 0, 0, fmt.Errorf("invalid hour %q", rawClock) } if minute < 0 || minute > 59 { return 0, 0, fmt.Errorf("invalid minute %q", rawClock) } if ampm == "AM" { if hour == 12 { hour = 0 } return hour, minute, nil } if hour != 12 { hour += 12 } return hour, minute, nil } func forecastDiscussionLocation(abbrev string) (*time.Location, error) { offsets := map[string]int{ "AST": -4 * 3600, "ADT": -3 * 3600, "EST": -5 * 3600, "EDT": -4 * 3600, "CST": -6 * 3600, "CDT": -5 * 3600, "MST": -7 * 3600, "MDT": -6 * 3600, "PST": -8 * 3600, "PDT": -7 * 3600, "AKST": -9 * 3600, "AKDT": -8 * 3600, "HST": -10 * 3600, "UTC": 0, "GMT": 0, } abbr := strings.ToUpper(strings.TrimSpace(abbrev)) offset, ok := offsets[abbr] if !ok { return nil, fmt.Errorf("unsupported time zone %q", abbrev) } return time.FixedZone(abbr, offset), nil } func extractForecastDiscussionSection(lines []string, section string) ([]string, bool) { target := "." + section + "..." for i, raw := range lines { line := strings.TrimSpace(raw) if !strings.HasPrefix(line, target) { continue } out := []string{line} for j := i + 1; j < len(lines); j++ { next := strings.TrimSpace(lines[j]) if next == "&&" || next == "$$" || strings.Contains(next, "WATCHES/WARNINGS/ADVISORIES") { break } if j > i+1 && isForecastDiscussionSectionHeader(next) { break } out = append(out, lines[j]) } return out, true } return nil, false } func isForecastDiscussionSectionHeader(line string) bool { return forecastDiscussionHeaderRE.MatchString(strings.TrimSpace(line)) } func parseForecastDiscussionKeyMessages(block []string) []string { if len(block) <= 1 { return nil } body := trimBlankLines(block[1:]) var messages []string var current strings.Builder flush := func() { msg := strings.TrimSpace(current.String()) if msg != "" { messages = append(messages, msg) } current.Reset() } for _, raw := range body { line := strings.TrimSpace(raw) if line == "" { continue } if strings.HasPrefix(line, "-") { flush() line = strings.TrimSpace(strings.TrimPrefix(line, "-")) current.WriteString(line) continue } if current.Len() > 0 { current.WriteByte(' ') } current.WriteString(line) } flush() return messages } func parseForecastDiscussionTextSection(block []string) (ForecastDiscussionSection, error) { if len(block) == 0 { return ForecastDiscussionSection{}, fmt.Errorf("empty section") } section := ForecastDiscussionSection{ Qualifier: parseForecastDiscussionQualifier(strings.TrimSpace(block[0])), } body := trimBlankLines(block[1:]) if len(body) == 0 { return section, nil } first := strings.TrimSpace(body[0]) if strings.HasPrefix(first, "Issued at ") { issuedAt, err := parseForecastDiscussionIssueTime(first) if err != nil { return ForecastDiscussionSection{}, fmt.Errorf("parse section issuedAt %q: %w", first, err) } tt := issuedAt.UTC() section.IssuedAt = &tt body = trimBlankLines(body[1:]) } body = trimForecastDiscussionSignatureLines(body) section.Text = joinForecastDiscussionParagraphs(body) return section, nil } func parseForecastDiscussionQualifier(header string) string { m := forecastDiscussionHeaderRE.FindStringSubmatch(header) if len(m) != 3 { return "" } return strings.TrimSpace(m[2]) } func trimBlankLines(lines []string) []string { start := 0 for start < len(lines) && strings.TrimSpace(lines[start]) == "" { start++ } end := len(lines) for end > start && strings.TrimSpace(lines[end-1]) == "" { end-- } return lines[start:end] } func trimForecastDiscussionSignatureLines(lines []string) []string { lines = trimBlankLines(lines) for len(lines) > 0 { last := strings.TrimSpace(lines[len(lines)-1]) if last == "" { lines = lines[:len(lines)-1] continue } if forecastDiscussionSigRE.MatchString(last) { lines = trimBlankLines(lines[:len(lines)-1]) continue } break } return lines } func joinForecastDiscussionParagraphs(lines []string) string { lines = trimBlankLines(lines) if len(lines) == 0 { return "" } var paragraphs []string current := make([]string, 0, len(lines)) flush := func() { if len(current) == 0 { return } paragraphs = append(paragraphs, strings.Join(current, " ")) current = current[:0] } for _, raw := range lines { line := strings.TrimSpace(raw) if line == "" { flush() continue } current = append(current, line) } flush() return strings.Join(paragraphs, "\n\n") }