All checks were successful
ci/woodpecker/push/build-image Pipeline was successful
553 lines
12 KiB
Go
553 lines
12 KiB
Go
package nws
|
|
|
|
import (
|
|
"fmt"
|
|
"html"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
type ForecastDiscussion struct {
|
|
OfficeID string
|
|
OfficeName string
|
|
Product string
|
|
IssuedAt time.Time
|
|
UpdatedAt *time.Time
|
|
|
|
KeyMessages []string
|
|
ShortTerm *ForecastDiscussionSection
|
|
LongTerm *ForecastDiscussionSection
|
|
}
|
|
|
|
type ForecastDiscussionSection struct {
|
|
Qualifier string
|
|
IssuedAt *time.Time
|
|
Text string
|
|
}
|
|
|
|
var (
|
|
forecastDiscussionHeaderRE = regexp.MustCompile(`^\.(KEY MESSAGES|SHORT TERM|LONG TERM|AVIATION)\.\.\.(.*)$`)
|
|
forecastDiscussionAFDRE = regexp.MustCompile(`^AFD([A-Z]{3})$`)
|
|
forecastDiscussionWMORE = regexp.MustCompile(`\bK([A-Z]{3})\b`)
|
|
forecastDiscussionSigRE = regexp.MustCompile(`^[A-Z]{2,6}$`)
|
|
)
|
|
|
|
func ParseForecastDiscussionHTML(raw string) (ForecastDiscussion, error) {
|
|
text, err := ExtractForecastDiscussionText(raw)
|
|
if err != nil {
|
|
return ForecastDiscussion{}, err
|
|
}
|
|
|
|
parsed, err := ParseForecastDiscussionText(text)
|
|
if err != nil {
|
|
return ForecastDiscussion{}, err
|
|
}
|
|
|
|
parsed.UpdatedAt = parseForecastDiscussionUpdatedAt(raw)
|
|
return parsed, nil
|
|
}
|
|
|
|
func ExtractForecastDiscussionText(raw string) (string, error) {
|
|
lower := strings.ToLower(raw)
|
|
searchFrom := 0
|
|
for {
|
|
openStart := strings.Index(lower[searchFrom:], "<pre")
|
|
if openStart < 0 {
|
|
return "", fmt.Errorf("missing <pre class=\"glossaryProduct\"> block")
|
|
}
|
|
openStart += searchFrom
|
|
|
|
openEnd := strings.Index(lower[openStart:], ">")
|
|
if openEnd < 0 {
|
|
return "", fmt.Errorf("unterminated <pre> tag")
|
|
}
|
|
openEnd += openStart
|
|
|
|
tag := lower[openStart : openEnd+1]
|
|
if isGlossaryProductTag(tag) {
|
|
closeStart := strings.Index(lower[openEnd+1:], "</pre>")
|
|
if closeStart < 0 {
|
|
return "", fmt.Errorf("missing closing </pre> for glossaryProduct block")
|
|
}
|
|
closeStart += openEnd + 1
|
|
|
|
text := html.UnescapeString(raw[openEnd+1 : closeStart])
|
|
text = strings.ReplaceAll(text, "\r\n", "\n")
|
|
text = strings.ReplaceAll(text, "\r", "\n")
|
|
return text, nil
|
|
}
|
|
|
|
searchFrom = openEnd + 1
|
|
}
|
|
}
|
|
|
|
func ParseForecastDiscussionText(text string) (ForecastDiscussion, error) {
|
|
lines := splitLines(text)
|
|
|
|
officeID := parseForecastDiscussionOfficeID(lines)
|
|
officeName, issuedAt, err := parseForecastDiscussionHeader(lines)
|
|
if err != nil {
|
|
return ForecastDiscussion{}, err
|
|
}
|
|
|
|
out := ForecastDiscussion{
|
|
OfficeID: officeID,
|
|
OfficeName: officeName,
|
|
Product: "afd",
|
|
IssuedAt: issuedAt.UTC(),
|
|
}
|
|
|
|
if block, ok := extractForecastDiscussionSection(lines, "KEY MESSAGES"); ok {
|
|
out.KeyMessages = parseForecastDiscussionKeyMessages(block)
|
|
}
|
|
if block, ok := extractForecastDiscussionSection(lines, "SHORT TERM"); ok {
|
|
section, err := parseForecastDiscussionTextSection(block)
|
|
if err != nil {
|
|
return ForecastDiscussion{}, fmt.Errorf("parse SHORT TERM: %w", err)
|
|
}
|
|
out.ShortTerm = §ion
|
|
}
|
|
if block, ok := extractForecastDiscussionSection(lines, "LONG TERM"); ok {
|
|
section, err := parseForecastDiscussionTextSection(block)
|
|
if err != nil {
|
|
return ForecastDiscussion{}, fmt.Errorf("parse LONG TERM: %w", err)
|
|
}
|
|
out.LongTerm = §ion
|
|
}
|
|
|
|
return out, nil
|
|
}
|
|
|
|
func isGlossaryProductTag(tag string) bool {
|
|
tag = strings.ToLower(tag)
|
|
return strings.Contains(tag, `class="glossaryproduct"`) ||
|
|
strings.Contains(tag, `class='glossaryproduct'`) ||
|
|
strings.Contains(tag, `class="glossaryproduct `) ||
|
|
strings.Contains(tag, `class='glossaryproduct `)
|
|
}
|
|
|
|
func parseForecastDiscussionUpdatedAt(raw string) *time.Time {
|
|
lower := strings.ToLower(raw)
|
|
searchFrom := 0
|
|
for {
|
|
metaStart := strings.Index(lower[searchFrom:], "<meta")
|
|
if metaStart < 0 {
|
|
return nil
|
|
}
|
|
metaStart += searchFrom
|
|
|
|
metaEnd := strings.Index(lower[metaStart:], ">")
|
|
if metaEnd < 0 {
|
|
return nil
|
|
}
|
|
metaEnd += metaStart
|
|
|
|
tag := raw[metaStart : metaEnd+1]
|
|
if !strings.EqualFold(strings.TrimSpace(extractHTMLAttr(tag, "name")), "DC.date.created") {
|
|
searchFrom = metaEnd + 1
|
|
continue
|
|
}
|
|
|
|
content := strings.TrimSpace(extractHTMLAttr(tag, "content"))
|
|
if content == "" {
|
|
return nil
|
|
}
|
|
t, err := ParseTime(content)
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
tt := t.UTC()
|
|
return &tt
|
|
}
|
|
}
|
|
|
|
func extractHTMLAttr(tag, attr string) string {
|
|
lower := strings.ToLower(tag)
|
|
attrLower := strings.ToLower(attr)
|
|
for i := 0; i < len(lower); i++ {
|
|
idx := strings.Index(lower[i:], attrLower)
|
|
if idx < 0 {
|
|
return ""
|
|
}
|
|
idx += i
|
|
if idx > 0 {
|
|
prev := lower[idx-1]
|
|
if isAttrNameChar(prev) {
|
|
i = idx + len(attrLower)
|
|
continue
|
|
}
|
|
}
|
|
j := idx + len(attrLower)
|
|
for j < len(lower) && isHTMLSpace(lower[j]) {
|
|
j++
|
|
}
|
|
if j >= len(lower) || lower[j] != '=' {
|
|
i = idx + len(attrLower)
|
|
continue
|
|
}
|
|
j++
|
|
for j < len(lower) && isHTMLSpace(lower[j]) {
|
|
j++
|
|
}
|
|
if j >= len(tag) {
|
|
return ""
|
|
}
|
|
quote := tag[j]
|
|
if quote != '"' && quote != '\'' {
|
|
return ""
|
|
}
|
|
j++
|
|
k := j
|
|
for k < len(tag) && tag[k] != quote {
|
|
k++
|
|
}
|
|
if k >= len(tag) {
|
|
return ""
|
|
}
|
|
return html.UnescapeString(tag[j:k])
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func isHTMLSpace(b byte) bool {
|
|
switch b {
|
|
case ' ', '\n', '\r', '\t', '\f':
|
|
return true
|
|
default:
|
|
return false
|
|
}
|
|
}
|
|
|
|
func isAttrNameChar(b byte) bool {
|
|
switch {
|
|
case b >= 'a' && b <= 'z':
|
|
return true
|
|
case b >= 'A' && b <= 'Z':
|
|
return true
|
|
case b >= '0' && b <= '9':
|
|
return true
|
|
case b == '-' || b == '_' || b == ':':
|
|
return true
|
|
default:
|
|
return false
|
|
}
|
|
}
|
|
|
|
func splitLines(text string) []string {
|
|
text = strings.ReplaceAll(text, "\r\n", "\n")
|
|
text = strings.ReplaceAll(text, "\r", "\n")
|
|
return strings.Split(text, "\n")
|
|
}
|
|
|
|
func parseForecastDiscussionOfficeID(lines []string) string {
|
|
for _, raw := range lines {
|
|
line := strings.TrimSpace(raw)
|
|
if m := forecastDiscussionAFDRE.FindStringSubmatch(line); len(m) == 2 {
|
|
return m[1]
|
|
}
|
|
}
|
|
for _, raw := range lines {
|
|
line := strings.TrimSpace(raw)
|
|
if m := forecastDiscussionWMORE.FindStringSubmatch(line); len(m) == 2 {
|
|
return m[1]
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func parseForecastDiscussionHeader(lines []string) (string, time.Time, error) {
|
|
for i, raw := range lines {
|
|
line := strings.TrimSpace(raw)
|
|
if !strings.HasPrefix(line, "National Weather Service ") {
|
|
continue
|
|
}
|
|
|
|
officeName := line
|
|
for j := i + 1; j < len(lines); j++ {
|
|
tsLine := strings.TrimSpace(lines[j])
|
|
if tsLine == "" {
|
|
continue
|
|
}
|
|
issuedAt, err := parseForecastDiscussionIssueTime(tsLine)
|
|
if err != nil {
|
|
return "", time.Time{}, fmt.Errorf("parse bulletin issuedAt %q: %w", tsLine, err)
|
|
}
|
|
return officeName, issuedAt.UTC(), nil
|
|
}
|
|
|
|
return "", time.Time{}, fmt.Errorf("missing bulletin issue time after office line")
|
|
}
|
|
|
|
return "", time.Time{}, fmt.Errorf("missing office header")
|
|
}
|
|
|
|
func parseForecastDiscussionIssueTime(line string) (time.Time, error) {
|
|
line = strings.TrimSpace(line)
|
|
line = strings.TrimPrefix(line, "Issued at ")
|
|
line = strings.TrimSpace(line)
|
|
|
|
parts := strings.Fields(line)
|
|
if len(parts) != 7 {
|
|
return time.Time{}, fmt.Errorf("unexpected issue time format")
|
|
}
|
|
|
|
loc, err := forecastDiscussionLocation(parts[2])
|
|
if err != nil {
|
|
return time.Time{}, err
|
|
}
|
|
|
|
datePart, err := time.Parse("Mon Jan 2 2006", strings.Join(parts[3:], " "))
|
|
if err != nil {
|
|
return time.Time{}, err
|
|
}
|
|
|
|
hour, minute, err := parseForecastDiscussionClock(parts[0], parts[1])
|
|
if err != nil {
|
|
return time.Time{}, err
|
|
}
|
|
|
|
return time.Date(
|
|
datePart.Year(),
|
|
datePart.Month(),
|
|
datePart.Day(),
|
|
hour,
|
|
minute,
|
|
0,
|
|
0,
|
|
loc,
|
|
), nil
|
|
}
|
|
|
|
func parseForecastDiscussionClock(rawClock, rawAMPM string) (int, int, error) {
|
|
clock := strings.TrimSpace(rawClock)
|
|
ampm := strings.ToUpper(strings.TrimSpace(rawAMPM))
|
|
if ampm != "AM" && ampm != "PM" {
|
|
return 0, 0, fmt.Errorf("unexpected meridiem %q", rawAMPM)
|
|
}
|
|
|
|
n, err := strconv.Atoi(clock)
|
|
if err != nil {
|
|
return 0, 0, fmt.Errorf("invalid clock %q", rawClock)
|
|
}
|
|
|
|
hour := n
|
|
minute := 0
|
|
if len(clock) >= 3 {
|
|
hour = n / 100
|
|
minute = n % 100
|
|
}
|
|
|
|
if hour < 1 || hour > 12 {
|
|
return 0, 0, fmt.Errorf("invalid hour %q", rawClock)
|
|
}
|
|
if minute < 0 || minute > 59 {
|
|
return 0, 0, fmt.Errorf("invalid minute %q", rawClock)
|
|
}
|
|
|
|
if ampm == "AM" {
|
|
if hour == 12 {
|
|
hour = 0
|
|
}
|
|
return hour, minute, nil
|
|
}
|
|
|
|
if hour != 12 {
|
|
hour += 12
|
|
}
|
|
return hour, minute, nil
|
|
}
|
|
|
|
func forecastDiscussionLocation(abbrev string) (*time.Location, error) {
|
|
offsets := map[string]int{
|
|
"AST": -4 * 3600,
|
|
"ADT": -3 * 3600,
|
|
"EST": -5 * 3600,
|
|
"EDT": -4 * 3600,
|
|
"CST": -6 * 3600,
|
|
"CDT": -5 * 3600,
|
|
"MST": -7 * 3600,
|
|
"MDT": -6 * 3600,
|
|
"PST": -8 * 3600,
|
|
"PDT": -7 * 3600,
|
|
"AKST": -9 * 3600,
|
|
"AKDT": -8 * 3600,
|
|
"HST": -10 * 3600,
|
|
"UTC": 0,
|
|
"GMT": 0,
|
|
}
|
|
|
|
abbr := strings.ToUpper(strings.TrimSpace(abbrev))
|
|
offset, ok := offsets[abbr]
|
|
if !ok {
|
|
return nil, fmt.Errorf("unsupported time zone %q", abbrev)
|
|
}
|
|
return time.FixedZone(abbr, offset), nil
|
|
}
|
|
|
|
func extractForecastDiscussionSection(lines []string, section string) ([]string, bool) {
|
|
target := "." + section + "..."
|
|
for i, raw := range lines {
|
|
line := strings.TrimSpace(raw)
|
|
if !strings.HasPrefix(line, target) {
|
|
continue
|
|
}
|
|
|
|
out := []string{line}
|
|
for j := i + 1; j < len(lines); j++ {
|
|
next := strings.TrimSpace(lines[j])
|
|
if next == "&&" || next == "$$" || strings.Contains(next, "WATCHES/WARNINGS/ADVISORIES") {
|
|
break
|
|
}
|
|
if j > i+1 && isForecastDiscussionSectionHeader(next) {
|
|
break
|
|
}
|
|
out = append(out, lines[j])
|
|
}
|
|
return out, true
|
|
}
|
|
return nil, false
|
|
}
|
|
|
|
func isForecastDiscussionSectionHeader(line string) bool {
|
|
return forecastDiscussionHeaderRE.MatchString(strings.TrimSpace(line))
|
|
}
|
|
|
|
func parseForecastDiscussionKeyMessages(block []string) []string {
|
|
if len(block) <= 1 {
|
|
return nil
|
|
}
|
|
|
|
body := trimBlankLines(block[1:])
|
|
var messages []string
|
|
var current strings.Builder
|
|
|
|
flush := func() {
|
|
msg := strings.TrimSpace(current.String())
|
|
if msg != "" {
|
|
messages = append(messages, msg)
|
|
}
|
|
current.Reset()
|
|
}
|
|
|
|
for _, raw := range body {
|
|
line := strings.TrimSpace(raw)
|
|
if line == "" {
|
|
continue
|
|
}
|
|
if strings.HasPrefix(line, "-") {
|
|
flush()
|
|
line = strings.TrimSpace(strings.TrimPrefix(line, "-"))
|
|
current.WriteString(line)
|
|
continue
|
|
}
|
|
if current.Len() > 0 {
|
|
current.WriteByte(' ')
|
|
}
|
|
current.WriteString(line)
|
|
}
|
|
flush()
|
|
|
|
return messages
|
|
}
|
|
|
|
func parseForecastDiscussionTextSection(block []string) (ForecastDiscussionSection, error) {
|
|
if len(block) == 0 {
|
|
return ForecastDiscussionSection{}, fmt.Errorf("empty section")
|
|
}
|
|
|
|
section := ForecastDiscussionSection{
|
|
Qualifier: parseForecastDiscussionQualifier(strings.TrimSpace(block[0])),
|
|
}
|
|
|
|
body := trimBlankLines(block[1:])
|
|
if len(body) == 0 {
|
|
return section, nil
|
|
}
|
|
|
|
first := strings.TrimSpace(body[0])
|
|
if strings.HasPrefix(first, "Issued at ") {
|
|
issuedAt, err := parseForecastDiscussionIssueTime(first)
|
|
if err != nil {
|
|
return ForecastDiscussionSection{}, fmt.Errorf("parse section issuedAt %q: %w", first, err)
|
|
}
|
|
tt := issuedAt.UTC()
|
|
section.IssuedAt = &tt
|
|
body = trimBlankLines(body[1:])
|
|
}
|
|
|
|
body = trimForecastDiscussionSignatureLines(body)
|
|
section.Text = joinForecastDiscussionParagraphs(body)
|
|
return section, nil
|
|
}
|
|
|
|
func parseForecastDiscussionQualifier(header string) string {
|
|
m := forecastDiscussionHeaderRE.FindStringSubmatch(header)
|
|
if len(m) != 3 {
|
|
return ""
|
|
}
|
|
return strings.TrimSpace(m[2])
|
|
}
|
|
|
|
func trimBlankLines(lines []string) []string {
|
|
start := 0
|
|
for start < len(lines) && strings.TrimSpace(lines[start]) == "" {
|
|
start++
|
|
}
|
|
|
|
end := len(lines)
|
|
for end > start && strings.TrimSpace(lines[end-1]) == "" {
|
|
end--
|
|
}
|
|
|
|
return lines[start:end]
|
|
}
|
|
|
|
func trimForecastDiscussionSignatureLines(lines []string) []string {
|
|
lines = trimBlankLines(lines)
|
|
for len(lines) > 0 {
|
|
last := strings.TrimSpace(lines[len(lines)-1])
|
|
if last == "" {
|
|
lines = lines[:len(lines)-1]
|
|
continue
|
|
}
|
|
if forecastDiscussionSigRE.MatchString(last) {
|
|
lines = trimBlankLines(lines[:len(lines)-1])
|
|
continue
|
|
}
|
|
break
|
|
}
|
|
return lines
|
|
}
|
|
|
|
func joinForecastDiscussionParagraphs(lines []string) string {
|
|
lines = trimBlankLines(lines)
|
|
if len(lines) == 0 {
|
|
return ""
|
|
}
|
|
|
|
var paragraphs []string
|
|
current := make([]string, 0, len(lines))
|
|
|
|
flush := func() {
|
|
if len(current) == 0 {
|
|
return
|
|
}
|
|
paragraphs = append(paragraphs, strings.Join(current, " "))
|
|
current = current[:0]
|
|
}
|
|
|
|
for _, raw := range lines {
|
|
line := strings.TrimSpace(raw)
|
|
if line == "" {
|
|
flush()
|
|
continue
|
|
}
|
|
current = append(current, line)
|
|
}
|
|
flush()
|
|
|
|
return strings.Join(paragraphs, "\n\n")
|
|
}
|