Added support for Area Forecast Discussions issued by the NWS
All checks were successful
ci/woodpecker/push/build-image Pipeline was successful

This commit is contained in:
2026-03-28 16:17:03 -05:00
parent 40f17c9d86
commit a0389ebce8
20 changed files with 1463 additions and 24 deletions

View File

@@ -0,0 +1,552 @@
package nws
import (
"fmt"
"html"
"regexp"
"strconv"
"strings"
"time"
)
type ForecastDiscussion struct {
OfficeID string
OfficeName string
Product string
IssuedAt time.Time
UpdatedAt *time.Time
KeyMessages []string
ShortTerm *ForecastDiscussionSection
LongTerm *ForecastDiscussionSection
}
type ForecastDiscussionSection struct {
Qualifier string
IssuedAt *time.Time
Text string
}
var (
forecastDiscussionHeaderRE = regexp.MustCompile(`^\.(KEY MESSAGES|SHORT TERM|LONG TERM|AVIATION)\.\.\.(.*)$`)
forecastDiscussionAFDRE = regexp.MustCompile(`^AFD([A-Z]{3})$`)
forecastDiscussionWMORE = regexp.MustCompile(`\bK([A-Z]{3})\b`)
forecastDiscussionSigRE = regexp.MustCompile(`^[A-Z]{2,6}$`)
)
func ParseForecastDiscussionHTML(raw string) (ForecastDiscussion, error) {
text, err := ExtractForecastDiscussionText(raw)
if err != nil {
return ForecastDiscussion{}, err
}
parsed, err := ParseForecastDiscussionText(text)
if err != nil {
return ForecastDiscussion{}, err
}
parsed.UpdatedAt = parseForecastDiscussionUpdatedAt(raw)
return parsed, nil
}
func ExtractForecastDiscussionText(raw string) (string, error) {
lower := strings.ToLower(raw)
searchFrom := 0
for {
openStart := strings.Index(lower[searchFrom:], "<pre")
if openStart < 0 {
return "", fmt.Errorf("missing <pre class=\"glossaryProduct\"> block")
}
openStart += searchFrom
openEnd := strings.Index(lower[openStart:], ">")
if openEnd < 0 {
return "", fmt.Errorf("unterminated <pre> tag")
}
openEnd += openStart
tag := lower[openStart : openEnd+1]
if isGlossaryProductTag(tag) {
closeStart := strings.Index(lower[openEnd+1:], "</pre>")
if closeStart < 0 {
return "", fmt.Errorf("missing closing </pre> for glossaryProduct block")
}
closeStart += openEnd + 1
text := html.UnescapeString(raw[openEnd+1 : closeStart])
text = strings.ReplaceAll(text, "\r\n", "\n")
text = strings.ReplaceAll(text, "\r", "\n")
return text, nil
}
searchFrom = openEnd + 1
}
}
func ParseForecastDiscussionText(text string) (ForecastDiscussion, error) {
lines := splitLines(text)
officeID := parseForecastDiscussionOfficeID(lines)
officeName, issuedAt, err := parseForecastDiscussionHeader(lines)
if err != nil {
return ForecastDiscussion{}, err
}
out := ForecastDiscussion{
OfficeID: officeID,
OfficeName: officeName,
Product: "afd",
IssuedAt: issuedAt.UTC(),
}
if block, ok := extractForecastDiscussionSection(lines, "KEY MESSAGES"); ok {
out.KeyMessages = parseForecastDiscussionKeyMessages(block)
}
if block, ok := extractForecastDiscussionSection(lines, "SHORT TERM"); ok {
section, err := parseForecastDiscussionTextSection(block)
if err != nil {
return ForecastDiscussion{}, fmt.Errorf("parse SHORT TERM: %w", err)
}
out.ShortTerm = &section
}
if block, ok := extractForecastDiscussionSection(lines, "LONG TERM"); ok {
section, err := parseForecastDiscussionTextSection(block)
if err != nil {
return ForecastDiscussion{}, fmt.Errorf("parse LONG TERM: %w", err)
}
out.LongTerm = &section
}
return out, nil
}
func isGlossaryProductTag(tag string) bool {
tag = strings.ToLower(tag)
return strings.Contains(tag, `class="glossaryproduct"`) ||
strings.Contains(tag, `class='glossaryproduct'`) ||
strings.Contains(tag, `class="glossaryproduct `) ||
strings.Contains(tag, `class='glossaryproduct `)
}
func parseForecastDiscussionUpdatedAt(raw string) *time.Time {
lower := strings.ToLower(raw)
searchFrom := 0
for {
metaStart := strings.Index(lower[searchFrom:], "<meta")
if metaStart < 0 {
return nil
}
metaStart += searchFrom
metaEnd := strings.Index(lower[metaStart:], ">")
if metaEnd < 0 {
return nil
}
metaEnd += metaStart
tag := raw[metaStart : metaEnd+1]
if !strings.EqualFold(strings.TrimSpace(extractHTMLAttr(tag, "name")), "DC.date.created") {
searchFrom = metaEnd + 1
continue
}
content := strings.TrimSpace(extractHTMLAttr(tag, "content"))
if content == "" {
return nil
}
t, err := ParseTime(content)
if err != nil {
return nil
}
tt := t.UTC()
return &tt
}
}
func extractHTMLAttr(tag, attr string) string {
lower := strings.ToLower(tag)
attrLower := strings.ToLower(attr)
for i := 0; i < len(lower); i++ {
idx := strings.Index(lower[i:], attrLower)
if idx < 0 {
return ""
}
idx += i
if idx > 0 {
prev := lower[idx-1]
if isAttrNameChar(prev) {
i = idx + len(attrLower)
continue
}
}
j := idx + len(attrLower)
for j < len(lower) && isHTMLSpace(lower[j]) {
j++
}
if j >= len(lower) || lower[j] != '=' {
i = idx + len(attrLower)
continue
}
j++
for j < len(lower) && isHTMLSpace(lower[j]) {
j++
}
if j >= len(tag) {
return ""
}
quote := tag[j]
if quote != '"' && quote != '\'' {
return ""
}
j++
k := j
for k < len(tag) && tag[k] != quote {
k++
}
if k >= len(tag) {
return ""
}
return html.UnescapeString(tag[j:k])
}
return ""
}
func isHTMLSpace(b byte) bool {
switch b {
case ' ', '\n', '\r', '\t', '\f':
return true
default:
return false
}
}
func isAttrNameChar(b byte) bool {
switch {
case b >= 'a' && b <= 'z':
return true
case b >= 'A' && b <= 'Z':
return true
case b >= '0' && b <= '9':
return true
case b == '-' || b == '_' || b == ':':
return true
default:
return false
}
}
func splitLines(text string) []string {
text = strings.ReplaceAll(text, "\r\n", "\n")
text = strings.ReplaceAll(text, "\r", "\n")
return strings.Split(text, "\n")
}
func parseForecastDiscussionOfficeID(lines []string) string {
for _, raw := range lines {
line := strings.TrimSpace(raw)
if m := forecastDiscussionAFDRE.FindStringSubmatch(line); len(m) == 2 {
return m[1]
}
}
for _, raw := range lines {
line := strings.TrimSpace(raw)
if m := forecastDiscussionWMORE.FindStringSubmatch(line); len(m) == 2 {
return m[1]
}
}
return ""
}
func parseForecastDiscussionHeader(lines []string) (string, time.Time, error) {
for i, raw := range lines {
line := strings.TrimSpace(raw)
if !strings.HasPrefix(line, "National Weather Service ") {
continue
}
officeName := line
for j := i + 1; j < len(lines); j++ {
tsLine := strings.TrimSpace(lines[j])
if tsLine == "" {
continue
}
issuedAt, err := parseForecastDiscussionIssueTime(tsLine)
if err != nil {
return "", time.Time{}, fmt.Errorf("parse bulletin issuedAt %q: %w", tsLine, err)
}
return officeName, issuedAt.UTC(), nil
}
return "", time.Time{}, fmt.Errorf("missing bulletin issue time after office line")
}
return "", time.Time{}, fmt.Errorf("missing office header")
}
func parseForecastDiscussionIssueTime(line string) (time.Time, error) {
line = strings.TrimSpace(line)
line = strings.TrimPrefix(line, "Issued at ")
line = strings.TrimSpace(line)
parts := strings.Fields(line)
if len(parts) != 7 {
return time.Time{}, fmt.Errorf("unexpected issue time format")
}
loc, err := forecastDiscussionLocation(parts[2])
if err != nil {
return time.Time{}, err
}
datePart, err := time.Parse("Mon Jan 2 2006", strings.Join(parts[3:], " "))
if err != nil {
return time.Time{}, err
}
hour, minute, err := parseForecastDiscussionClock(parts[0], parts[1])
if err != nil {
return time.Time{}, err
}
return time.Date(
datePart.Year(),
datePart.Month(),
datePart.Day(),
hour,
minute,
0,
0,
loc,
), nil
}
func parseForecastDiscussionClock(rawClock, rawAMPM string) (int, int, error) {
clock := strings.TrimSpace(rawClock)
ampm := strings.ToUpper(strings.TrimSpace(rawAMPM))
if ampm != "AM" && ampm != "PM" {
return 0, 0, fmt.Errorf("unexpected meridiem %q", rawAMPM)
}
n, err := strconv.Atoi(clock)
if err != nil {
return 0, 0, fmt.Errorf("invalid clock %q", rawClock)
}
hour := n
minute := 0
if len(clock) >= 3 {
hour = n / 100
minute = n % 100
}
if hour < 1 || hour > 12 {
return 0, 0, fmt.Errorf("invalid hour %q", rawClock)
}
if minute < 0 || minute > 59 {
return 0, 0, fmt.Errorf("invalid minute %q", rawClock)
}
if ampm == "AM" {
if hour == 12 {
hour = 0
}
return hour, minute, nil
}
if hour != 12 {
hour += 12
}
return hour, minute, nil
}
func forecastDiscussionLocation(abbrev string) (*time.Location, error) {
offsets := map[string]int{
"AST": -4 * 3600,
"ADT": -3 * 3600,
"EST": -5 * 3600,
"EDT": -4 * 3600,
"CST": -6 * 3600,
"CDT": -5 * 3600,
"MST": -7 * 3600,
"MDT": -6 * 3600,
"PST": -8 * 3600,
"PDT": -7 * 3600,
"AKST": -9 * 3600,
"AKDT": -8 * 3600,
"HST": -10 * 3600,
"UTC": 0,
"GMT": 0,
}
abbr := strings.ToUpper(strings.TrimSpace(abbrev))
offset, ok := offsets[abbr]
if !ok {
return nil, fmt.Errorf("unsupported time zone %q", abbrev)
}
return time.FixedZone(abbr, offset), nil
}
func extractForecastDiscussionSection(lines []string, section string) ([]string, bool) {
target := "." + section + "..."
for i, raw := range lines {
line := strings.TrimSpace(raw)
if !strings.HasPrefix(line, target) {
continue
}
out := []string{line}
for j := i + 1; j < len(lines); j++ {
next := strings.TrimSpace(lines[j])
if next == "&&" || next == "$$" || strings.Contains(next, "WATCHES/WARNINGS/ADVISORIES") {
break
}
if j > i+1 && isForecastDiscussionSectionHeader(next) {
break
}
out = append(out, lines[j])
}
return out, true
}
return nil, false
}
func isForecastDiscussionSectionHeader(line string) bool {
return forecastDiscussionHeaderRE.MatchString(strings.TrimSpace(line))
}
func parseForecastDiscussionKeyMessages(block []string) []string {
if len(block) <= 1 {
return nil
}
body := trimBlankLines(block[1:])
var messages []string
var current strings.Builder
flush := func() {
msg := strings.TrimSpace(current.String())
if msg != "" {
messages = append(messages, msg)
}
current.Reset()
}
for _, raw := range body {
line := strings.TrimSpace(raw)
if line == "" {
continue
}
if strings.HasPrefix(line, "-") {
flush()
line = strings.TrimSpace(strings.TrimPrefix(line, "-"))
current.WriteString(line)
continue
}
if current.Len() > 0 {
current.WriteByte(' ')
}
current.WriteString(line)
}
flush()
return messages
}
func parseForecastDiscussionTextSection(block []string) (ForecastDiscussionSection, error) {
if len(block) == 0 {
return ForecastDiscussionSection{}, fmt.Errorf("empty section")
}
section := ForecastDiscussionSection{
Qualifier: parseForecastDiscussionQualifier(strings.TrimSpace(block[0])),
}
body := trimBlankLines(block[1:])
if len(body) == 0 {
return section, nil
}
first := strings.TrimSpace(body[0])
if strings.HasPrefix(first, "Issued at ") {
issuedAt, err := parseForecastDiscussionIssueTime(first)
if err != nil {
return ForecastDiscussionSection{}, fmt.Errorf("parse section issuedAt %q: %w", first, err)
}
tt := issuedAt.UTC()
section.IssuedAt = &tt
body = trimBlankLines(body[1:])
}
body = trimForecastDiscussionSignatureLines(body)
section.Text = joinForecastDiscussionParagraphs(body)
return section, nil
}
func parseForecastDiscussionQualifier(header string) string {
m := forecastDiscussionHeaderRE.FindStringSubmatch(header)
if len(m) != 3 {
return ""
}
return strings.TrimSpace(m[2])
}
func trimBlankLines(lines []string) []string {
start := 0
for start < len(lines) && strings.TrimSpace(lines[start]) == "" {
start++
}
end := len(lines)
for end > start && strings.TrimSpace(lines[end-1]) == "" {
end--
}
return lines[start:end]
}
func trimForecastDiscussionSignatureLines(lines []string) []string {
lines = trimBlankLines(lines)
for len(lines) > 0 {
last := strings.TrimSpace(lines[len(lines)-1])
if last == "" {
lines = lines[:len(lines)-1]
continue
}
if forecastDiscussionSigRE.MatchString(last) {
lines = trimBlankLines(lines[:len(lines)-1])
continue
}
break
}
return lines
}
func joinForecastDiscussionParagraphs(lines []string) string {
lines = trimBlankLines(lines)
if len(lines) == 0 {
return ""
}
var paragraphs []string
current := make([]string, 0, len(lines))
flush := func() {
if len(current) == 0 {
return
}
paragraphs = append(paragraphs, strings.Join(current, " "))
current = current[:0]
}
for _, raw := range lines {
line := strings.TrimSpace(raw)
if line == "" {
flush()
continue
}
current = append(current, line)
}
flush()
return strings.Join(paragraphs, "\n\n")
}

View File

@@ -0,0 +1,118 @@
package nws
import (
"os"
"path/filepath"
"strings"
"testing"
"time"
)
func TestParseForecastDiscussionHTMLParsesExpectedFields(t *testing.T) {
raw := loadForecastDiscussionSampleHTML(t)
got, err := ParseForecastDiscussionHTML(raw)
if err != nil {
t.Fatalf("ParseForecastDiscussionHTML() error = %v", err)
}
if got.OfficeID != "LSX" {
t.Fatalf("OfficeID = %q, want LSX", got.OfficeID)
}
if got.OfficeName != "National Weather Service Saint Louis MO" {
t.Fatalf("OfficeName = %q", got.OfficeName)
}
if got.Product != "afd" {
t.Fatalf("Product = %q, want afd", got.Product)
}
wantIssuedAt := time.Date(2026, 3, 28, 19, 24, 0, 0, time.UTC)
if !got.IssuedAt.Equal(wantIssuedAt) {
t.Fatalf("IssuedAt = %s, want %s", got.IssuedAt.Format(time.RFC3339), wantIssuedAt.Format(time.RFC3339))
}
wantUpdatedAt := time.Date(2026, 3, 28, 20, 29, 47, 0, time.UTC)
if got.UpdatedAt == nil || !got.UpdatedAt.Equal(wantUpdatedAt) {
t.Fatalf("UpdatedAt = %v, want %s", got.UpdatedAt, wantUpdatedAt.Format(time.RFC3339))
}
wantMessages := []string{
"Elevated fire danger conditions are expected across a broad area tomorrow afternoon due to breezy southwest winds and low humidity.",
"Very warm temperatures are expected once again Monday and Tuesday, with highs well into the 80s.",
"A cold front late Tuesday or early Wednesday brings our next chance of thunderstorms, followed by a cooldown and possibly more chances for rain later in the week.",
}
if len(got.KeyMessages) != len(wantMessages) {
t.Fatalf("KeyMessages len = %d, want %d", len(got.KeyMessages), len(wantMessages))
}
for i := range wantMessages {
if got.KeyMessages[i] != wantMessages[i] {
t.Fatalf("KeyMessages[%d] = %q, want %q", i, got.KeyMessages[i], wantMessages[i])
}
}
if got.ShortTerm == nil {
t.Fatalf("ShortTerm is nil")
}
if got.ShortTerm.Qualifier != "(Through Late Sunday Night)" {
t.Fatalf("ShortTerm.Qualifier = %q", got.ShortTerm.Qualifier)
}
if got.ShortTerm.IssuedAt == nil || !got.ShortTerm.IssuedAt.Equal(time.Date(2026, 3, 28, 19, 19, 0, 0, time.UTC)) {
t.Fatalf("ShortTerm.IssuedAt = %v", got.ShortTerm.IssuedAt)
}
if !strings.Contains(got.ShortTerm.Text, "After a chilly morning") {
t.Fatalf("ShortTerm.Text missing expected prose: %q", got.ShortTerm.Text)
}
if strings.Contains(got.ShortTerm.Text, "BRC") {
t.Fatalf("ShortTerm.Text should not include signature: %q", got.ShortTerm.Text)
}
if strings.Contains(got.ShortTerm.Text, "\n\n\n") {
t.Fatalf("ShortTerm.Text contains unexpected paragraph breaks: %q", got.ShortTerm.Text)
}
if got.LongTerm == nil {
t.Fatalf("LongTerm is nil")
}
if got.LongTerm.Qualifier != "(Monday through Next Saturday)" {
t.Fatalf("LongTerm.Qualifier = %q", got.LongTerm.Qualifier)
}
if got.LongTerm.IssuedAt == nil || !got.LongTerm.IssuedAt.Equal(time.Date(2026, 3, 28, 19, 19, 0, 0, time.UTC)) {
t.Fatalf("LongTerm.IssuedAt = %v", got.LongTerm.IssuedAt)
}
if !strings.Contains(got.LongTerm.Text, "The peak of the warmth arrives Monday and Tuesday") {
t.Fatalf("LongTerm.Text missing expected prose: %q", got.LongTerm.Text)
}
if strings.Contains(got.LongTerm.Text, "AVIATION") || strings.Contains(got.LongTerm.Text, "WATCHES/WARNINGS/ADVISORIES") {
t.Fatalf("LongTerm.Text includes content from other sections: %q", got.LongTerm.Text)
}
}
func TestParseForecastDiscussionHTMLMissingPreBlock(t *testing.T) {
_, err := ParseForecastDiscussionHTML("<html><body><div>no pre block</div></body></html>")
if err == nil {
t.Fatalf("ParseForecastDiscussionHTML() error = nil, want error")
}
if !strings.Contains(err.Error(), "glossaryProduct") {
t.Fatalf("error = %q, want glossaryProduct context", err)
}
}
func TestParseForecastDiscussionTextMissingIssueTime(t *testing.T) {
_, err := ParseForecastDiscussionText("National Weather Service Saint Louis MO\n\n.KEY MESSAGES...\n- Test")
if err == nil {
t.Fatalf("ParseForecastDiscussionText() error = nil, want error")
}
if !strings.Contains(err.Error(), "issue time") {
t.Fatalf("error = %q, want issue time context", err)
}
}
func loadForecastDiscussionSampleHTML(t *testing.T) string {
t.Helper()
path := filepath.Join("testdata", "forecast_discussion_sample.html")
b, err := os.ReadFile(path)
if err != nil {
t.Fatalf("os.ReadFile(%q) error = %v", path, err)
}
return string(b)
}

View File

@@ -0,0 +1,84 @@
<!DOCTYPE html>
<html class="no-js">
<head>
<meta name="DC.date.created" scheme="ISO8601" content="2026-03-28T20:29:47+00:00" />
<title>National Weather Service</title>
</head>
<body>
<pre class="glossaryProduct">
988
FXUS63 KLSX 281924
AFDLSX
Area Forecast Discussion
National Weather Service Saint Louis MO
224 PM CDT Sat Mar 28 2026
.KEY MESSAGES...
- Elevated fire danger conditions are expected across a broad area
tomorrow afternoon due to breezy southwest winds and low
humidity.
- Very warm temperatures are expected once again Monday and
Tuesday, with highs well into the 80s.
- A cold front late Tuesday or early Wednesday brings our next
chance of thunderstorms, followed by a cooldown and possibly
more chances for rain later in the week.
&&
.SHORT TERM... (Through Late Sunday Night)
Issued at 219 PM CDT Sat Mar 28 2026
After a chilly morning that saw widespread freezing temperatures,
another warmup is in store over the next several days as southerly
winds become re-established. We will also see the return of
shower/thunderstorm chances Tuesday onward as we enter a more
unsettled pattern.
In the near-term, the focus continues to be on some lingering fire
weather potential thanks to the presence of an exceptionally dry
airmass.
BRC
&&
.LONG TERM... (Monday through Next Saturday)
Issued at 219 PM CDT Sat Mar 28 2026
The peak of the warmth arrives Monday and Tuesday, as a broad, but
low-amplitude ridge nudges eastward and steady warm/moist advection
continues on both days.
Wednesday onward, the day-to-day details become much less clear, but
latest trends suggest that an active/wet pattern will likely
continue as another more substantial trough follows with additional
chances for showers/thunderstorms late in the week.
BRC
&&
.AVIATION... (For the 18z TAFs through 18z Sunday Afternoon)
Issued at 1133 AM CDT Sat Mar 28 2026
VFR conditions are expected throughout the 18Z TAF period.
BRC
&&
.LSX WATCHES/WARNINGS/ADVISORIES...
MO...None.
IL...None.
&&
$$
WFO LSX
</pre>
</body>
</html>