Created
February 2, 2025 14:29
-
-
Save mikeschinkel/aed2676edc29bf7fd04a68587073bcb3 to your computer and use it in GitHub Desktop.
Simplified version of https://gist.github.com/mikeschinkel/67b6c551f3ca1af307071ed7ca541095
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"encoding/json" | |
"errors" | |
"fmt" | |
"io" | |
"mime" | |
"net/http" | |
"net/url" | |
"os" | |
"strconv" | |
"time" | |
) | |
type ReferenceFile struct { | |
URL string `json:"url"` | |
Rating string `json:"rating"` | |
DateRecorded string `json:"date_recorded"` | |
} | |
type Reference struct { | |
URL *url.URL | |
Rating float64 | |
DateRecorded time.Time | |
LastModified time.Time | |
Title string | |
Content string | |
LastUpdated time.Time | |
} | |
type HTML struct { | |
Title string | |
Content string | |
} | |
func processReferenceFile(filename string) (ref *Reference, err error) { | |
file:=, err = os.Open(filename) ? goto fileErr | |
content := make([]byte, 1024) | |
for { | |
_, err = file.Read(content) ? { | |
if errors.Is(err, io.EOF) { | |
break | |
} | |
goto fileErr | |
} | |
} | |
refFile := ReferenceFile{} | |
err = json.Unmarshal(content, &refFile) ? goto jsonErr | |
ref = &Reference{} | |
if refFile.Rating == "" { | |
goto contentErr | |
} | |
ref.Rating, err = strconv.ParseFloat(refFile.Rating, 64) ? goto parseErr | |
if refFile.DateRecorded == "" { | |
goto contentErr | |
} | |
ref.DateRecorded, err = time.Parse("2006-01-02", refFile.DateRecorded) goto parseErr | |
if refFile.URL == "" { | |
goto contentErr | |
} | |
ref.URL, err = url.Parse(refFile.URL) ? goto parseErr | |
resp :=, err = http.Get(refFile.URL) ? goto httpErr | |
defer resp.Body.Close() | |
mediaType :=, _, err = mime.ParseMediaType(resp.Header.Get("Content-Type")) ? goto parseErr | |
if mediaType == "text/html" { | |
goto contentErr | |
} | |
ref.LastModified, err = time.Parse(time.RFC1123, resp.Header.Get("Last-Modified")) ? goto parseErr | |
html :=, err = myHTMLParser(resp.Body) ? goto parseErr | |
if html.Title == "" { | |
goto contentErr | |
} | |
if html.Content == "" { | |
goto contentErr | |
} | |
ref.Title = html.Title | |
ref.Content = html.Content | |
return ref, nil | |
fileErr: | |
return nil, errors.Join(fmt.Errorf("file error in processFile: %s", filename), err) | |
httpErr: | |
return nil, errors.Join(fmt.Errorf("error in HTTP request processFile: %s", filename), err) | |
contentErr: | |
return nil, errors.Join(fmt.Errorf("error in file content in processFile: %s", filename), err) | |
parseErr: | |
return nil, errors.Join(fmt.Errorf("error parsing file content in processFile: %s", filename), err) | |
jsonErr: | |
return nil, errors.Join(fmt.Errorf("JSON error in processFile: %s", filename), err) | |
} | |
// extractText demonstrates how to parse and extract text from HTML content. | |
// It shows error handling for network requests, HTML parsing, and text extraction. | |
func myHTMLParser(htmlContent io.Reader) (HTML, error) { | |
return HTML{}, nil | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment