package nws import ( "fmt" "html" "regexp" "strconv" "strings" "time" ) type ForecastDiscussion struct { OfficeID string OfficeName string Product string IssuedAt time.Time UpdatedAt *time.Time KeyMessages []string ShortTerm *ForecastDiscussionSection LongTerm *ForecastDiscussionSection } type ForecastDiscussionSection struct { Qualifier string IssuedAt *time.Time Text string } var ( forecastDiscussionHeaderRE = regexp.MustCompile(`^\.(KEY MESSAGES|SHORT TERM|LONG TERM|AVIATION)\.\.\.(.*)$`) forecastDiscussionAFDRE = regexp.MustCompile(`^AFD([A-Z]{3})$`) forecastDiscussionWMORE = regexp.MustCompile(`\bK([A-Z]{3})\b`) forecastDiscussionSigRE = regexp.MustCompile(`^[A-Z]{2,6}$`) ) func ParseForecastDiscussionHTML(raw string) (ForecastDiscussion, error) { text, err := ExtractForecastDiscussionText(raw) if err != nil { return ForecastDiscussion{}, err } parsed, err := ParseForecastDiscussionText(text) if err != nil { return ForecastDiscussion{}, err } parsed.UpdatedAt = parseForecastDiscussionUpdatedAt(raw) return parsed, nil } func ExtractForecastDiscussionText(raw string) (string, error) { lower := strings.ToLower(raw) searchFrom := 0 for { openStart := strings.Index(lower[searchFrom:], "
block")
}
openStart += searchFrom
openEnd := strings.Index(lower[openStart:], ">")
if openEnd < 0 {
return "", fmt.Errorf("unterminated tag")
}
openEnd += openStart
tag := lower[openStart : openEnd+1]
if isGlossaryProductTag(tag) {
closeStart := strings.Index(lower[openEnd+1:], "")
if closeStart < 0 {
return "", fmt.Errorf("missing closing for glossaryProduct block")
}
closeStart += openEnd + 1
text := html.UnescapeString(raw[openEnd+1 : closeStart])
text = strings.ReplaceAll(text, "\r\n", "\n")
text = strings.ReplaceAll(text, "\r", "\n")
return text, nil
}
searchFrom = openEnd + 1
}
}
func ParseForecastDiscussionText(text string) (ForecastDiscussion, error) {
lines := splitLines(text)
officeID := parseForecastDiscussionOfficeID(lines)
officeName, issuedAt, err := parseForecastDiscussionHeader(lines)
if err != nil {
return ForecastDiscussion{}, err
}
out := ForecastDiscussion{
OfficeID: officeID,
OfficeName: officeName,
Product: "afd",
IssuedAt: issuedAt.UTC(),
}
if block, ok := extractForecastDiscussionSection(lines, "KEY MESSAGES"); ok {
out.KeyMessages = parseForecastDiscussionKeyMessages(block)
}
if block, ok := extractForecastDiscussionSection(lines, "SHORT TERM"); ok {
section, err := parseForecastDiscussionTextSection(block)
if err != nil {
return ForecastDiscussion{}, fmt.Errorf("parse SHORT TERM: %w", err)
}
out.ShortTerm = §ion
}
if block, ok := extractForecastDiscussionSection(lines, "LONG TERM"); ok {
section, err := parseForecastDiscussionTextSection(block)
if err != nil {
return ForecastDiscussion{}, fmt.Errorf("parse LONG TERM: %w", err)
}
out.LongTerm = §ion
}
return out, nil
}
func isGlossaryProductTag(tag string) bool {
tag = strings.ToLower(tag)
return strings.Contains(tag, `class="glossaryproduct"`) ||
strings.Contains(tag, `class='glossaryproduct'`) ||
strings.Contains(tag, `class="glossaryproduct `) ||
strings.Contains(tag, `class='glossaryproduct `)
}
func parseForecastDiscussionUpdatedAt(raw string) *time.Time {
lower := strings.ToLower(raw)
searchFrom := 0
for {
metaStart := strings.Index(lower[searchFrom:], "")
if metaEnd < 0 {
return nil
}
metaEnd += metaStart
tag := raw[metaStart : metaEnd+1]
if !strings.EqualFold(strings.TrimSpace(extractHTMLAttr(tag, "name")), "DC.date.created") {
searchFrom = metaEnd + 1
continue
}
content := strings.TrimSpace(extractHTMLAttr(tag, "content"))
if content == "" {
return nil
}
t, err := ParseTime(content)
if err != nil {
return nil
}
tt := t.UTC()
return &tt
}
}
func extractHTMLAttr(tag, attr string) string {
lower := strings.ToLower(tag)
attrLower := strings.ToLower(attr)
for i := 0; i < len(lower); i++ {
idx := strings.Index(lower[i:], attrLower)
if idx < 0 {
return ""
}
idx += i
if idx > 0 {
prev := lower[idx-1]
if isAttrNameChar(prev) {
i = idx + len(attrLower)
continue
}
}
j := idx + len(attrLower)
for j < len(lower) && isHTMLSpace(lower[j]) {
j++
}
if j >= len(lower) || lower[j] != '=' {
i = idx + len(attrLower)
continue
}
j++
for j < len(lower) && isHTMLSpace(lower[j]) {
j++
}
if j >= len(tag) {
return ""
}
quote := tag[j]
if quote != '"' && quote != '\'' {
return ""
}
j++
k := j
for k < len(tag) && tag[k] != quote {
k++
}
if k >= len(tag) {
return ""
}
return html.UnescapeString(tag[j:k])
}
return ""
}
func isHTMLSpace(b byte) bool {
switch b {
case ' ', '\n', '\r', '\t', '\f':
return true
default:
return false
}
}
func isAttrNameChar(b byte) bool {
switch {
case b >= 'a' && b <= 'z':
return true
case b >= 'A' && b <= 'Z':
return true
case b >= '0' && b <= '9':
return true
case b == '-' || b == '_' || b == ':':
return true
default:
return false
}
}
func splitLines(text string) []string {
text = strings.ReplaceAll(text, "\r\n", "\n")
text = strings.ReplaceAll(text, "\r", "\n")
return strings.Split(text, "\n")
}
func parseForecastDiscussionOfficeID(lines []string) string {
for _, raw := range lines {
line := strings.TrimSpace(raw)
if m := forecastDiscussionAFDRE.FindStringSubmatch(line); len(m) == 2 {
return m[1]
}
}
for _, raw := range lines {
line := strings.TrimSpace(raw)
if m := forecastDiscussionWMORE.FindStringSubmatch(line); len(m) == 2 {
return m[1]
}
}
return ""
}
func parseForecastDiscussionHeader(lines []string) (string, time.Time, error) {
for i, raw := range lines {
line := strings.TrimSpace(raw)
if !strings.HasPrefix(line, "National Weather Service ") {
continue
}
officeName := line
for j := i + 1; j < len(lines); j++ {
tsLine := strings.TrimSpace(lines[j])
if tsLine == "" {
continue
}
issuedAt, err := parseForecastDiscussionIssueTime(tsLine)
if err != nil {
return "", time.Time{}, fmt.Errorf("parse bulletin issuedAt %q: %w", tsLine, err)
}
return officeName, issuedAt.UTC(), nil
}
return "", time.Time{}, fmt.Errorf("missing bulletin issue time after office line")
}
return "", time.Time{}, fmt.Errorf("missing office header")
}
func parseForecastDiscussionIssueTime(line string) (time.Time, error) {
line = strings.TrimSpace(line)
line = strings.TrimPrefix(line, "Issued at ")
line = strings.TrimSpace(line)
parts := strings.Fields(line)
if len(parts) != 7 {
return time.Time{}, fmt.Errorf("unexpected issue time format")
}
loc, err := forecastDiscussionLocation(parts[2])
if err != nil {
return time.Time{}, err
}
datePart, err := time.Parse("Mon Jan 2 2006", strings.Join(parts[3:], " "))
if err != nil {
return time.Time{}, err
}
hour, minute, err := parseForecastDiscussionClock(parts[0], parts[1])
if err != nil {
return time.Time{}, err
}
return time.Date(
datePart.Year(),
datePart.Month(),
datePart.Day(),
hour,
minute,
0,
0,
loc,
), nil
}
func parseForecastDiscussionClock(rawClock, rawAMPM string) (int, int, error) {
clock := strings.TrimSpace(rawClock)
ampm := strings.ToUpper(strings.TrimSpace(rawAMPM))
if ampm != "AM" && ampm != "PM" {
return 0, 0, fmt.Errorf("unexpected meridiem %q", rawAMPM)
}
n, err := strconv.Atoi(clock)
if err != nil {
return 0, 0, fmt.Errorf("invalid clock %q", rawClock)
}
hour := n
minute := 0
if len(clock) >= 3 {
hour = n / 100
minute = n % 100
}
if hour < 1 || hour > 12 {
return 0, 0, fmt.Errorf("invalid hour %q", rawClock)
}
if minute < 0 || minute > 59 {
return 0, 0, fmt.Errorf("invalid minute %q", rawClock)
}
if ampm == "AM" {
if hour == 12 {
hour = 0
}
return hour, minute, nil
}
if hour != 12 {
hour += 12
}
return hour, minute, nil
}
func forecastDiscussionLocation(abbrev string) (*time.Location, error) {
offsets := map[string]int{
"AST": -4 * 3600,
"ADT": -3 * 3600,
"EST": -5 * 3600,
"EDT": -4 * 3600,
"CST": -6 * 3600,
"CDT": -5 * 3600,
"MST": -7 * 3600,
"MDT": -6 * 3600,
"PST": -8 * 3600,
"PDT": -7 * 3600,
"AKST": -9 * 3600,
"AKDT": -8 * 3600,
"HST": -10 * 3600,
"UTC": 0,
"GMT": 0,
}
abbr := strings.ToUpper(strings.TrimSpace(abbrev))
offset, ok := offsets[abbr]
if !ok {
return nil, fmt.Errorf("unsupported time zone %q", abbrev)
}
return time.FixedZone(abbr, offset), nil
}
func extractForecastDiscussionSection(lines []string, section string) ([]string, bool) {
target := "." + section + "..."
for i, raw := range lines {
line := strings.TrimSpace(raw)
if !strings.HasPrefix(line, target) {
continue
}
out := []string{line}
for j := i + 1; j < len(lines); j++ {
next := strings.TrimSpace(lines[j])
if next == "&&" || next == "$$" || strings.Contains(next, "WATCHES/WARNINGS/ADVISORIES") {
break
}
if j > i+1 && isForecastDiscussionSectionHeader(next) {
break
}
out = append(out, lines[j])
}
return out, true
}
return nil, false
}
func isForecastDiscussionSectionHeader(line string) bool {
return forecastDiscussionHeaderRE.MatchString(strings.TrimSpace(line))
}
func parseForecastDiscussionKeyMessages(block []string) []string {
if len(block) <= 1 {
return nil
}
body := trimBlankLines(block[1:])
var messages []string
var current strings.Builder
flush := func() {
msg := strings.TrimSpace(current.String())
if msg != "" {
messages = append(messages, msg)
}
current.Reset()
}
for _, raw := range body {
line := strings.TrimSpace(raw)
if line == "" {
continue
}
if strings.HasPrefix(line, "-") {
flush()
line = strings.TrimSpace(strings.TrimPrefix(line, "-"))
current.WriteString(line)
continue
}
if current.Len() > 0 {
current.WriteByte(' ')
}
current.WriteString(line)
}
flush()
return messages
}
func parseForecastDiscussionTextSection(block []string) (ForecastDiscussionSection, error) {
if len(block) == 0 {
return ForecastDiscussionSection{}, fmt.Errorf("empty section")
}
section := ForecastDiscussionSection{
Qualifier: parseForecastDiscussionQualifier(strings.TrimSpace(block[0])),
}
body := trimBlankLines(block[1:])
if len(body) == 0 {
return section, nil
}
first := strings.TrimSpace(body[0])
if strings.HasPrefix(first, "Issued at ") {
issuedAt, err := parseForecastDiscussionIssueTime(first)
if err != nil {
return ForecastDiscussionSection{}, fmt.Errorf("parse section issuedAt %q: %w", first, err)
}
tt := issuedAt.UTC()
section.IssuedAt = &tt
body = trimBlankLines(body[1:])
}
body = trimForecastDiscussionSignatureLines(body)
section.Text = joinForecastDiscussionParagraphs(body)
return section, nil
}
func parseForecastDiscussionQualifier(header string) string {
m := forecastDiscussionHeaderRE.FindStringSubmatch(header)
if len(m) != 3 {
return ""
}
return strings.TrimSpace(m[2])
}
func trimBlankLines(lines []string) []string {
start := 0
for start < len(lines) && strings.TrimSpace(lines[start]) == "" {
start++
}
end := len(lines)
for end > start && strings.TrimSpace(lines[end-1]) == "" {
end--
}
return lines[start:end]
}
func trimForecastDiscussionSignatureLines(lines []string) []string {
lines = trimBlankLines(lines)
for len(lines) > 0 {
last := strings.TrimSpace(lines[len(lines)-1])
if last == "" {
lines = lines[:len(lines)-1]
continue
}
if forecastDiscussionSigRE.MatchString(last) {
lines = trimBlankLines(lines[:len(lines)-1])
continue
}
break
}
return lines
}
func joinForecastDiscussionParagraphs(lines []string) string {
lines = trimBlankLines(lines)
if len(lines) == 0 {
return ""
}
var paragraphs []string
current := make([]string, 0, len(lines))
flush := func() {
if len(current) == 0 {
return
}
paragraphs = append(paragraphs, strings.Join(current, " "))
current = current[:0]
}
for _, raw := range lines {
line := strings.TrimSpace(raw)
if line == "" {
flush()
continue
}
current = append(current, line)
}
flush()
return strings.Join(paragraphs, "\n\n")
}