Skip to content
This repository was archived by the owner on Jun 22, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions internal/service/http_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package service

import (
"io"
"net/http"
"strings"
"testing"
)

type roundTripperFunc func(*http.Request) (*http.Response, error)

func (fn roundTripperFunc) RoundTrip(req *http.Request) (*http.Response, error) {
return fn(req)
}

func TestUserAgentTransportSetsConfiguredUserAgent(t *testing.T) {
transport := &userAgentTransport{
base: roundTripperFunc(func(req *http.Request) (*http.Response, error) {
if got := req.Header.Get("User-Agent"); got != "feedreader/0.1" {
t.Fatalf("unexpected user-agent: %q", got)
}
return &http.Response{
StatusCode: http.StatusOK,
Body: io.NopCloser(strings.NewReader("ok")),
Header: make(http.Header),
}, nil
}),
userAgent: "feedreader/0.1",
}

req, err := http.NewRequest(http.MethodGet, "https://example.com", nil)
if err != nil {
t.Fatalf("new request: %v", err)
}
resp, err := transport.RoundTrip(req)
if err != nil {
t.Fatalf("round trip: %v", err)
}
_ = resp.Body.Close()
}

func TestUserAgentTransportPreservesExplicitUserAgent(t *testing.T) {
transport := &userAgentTransport{
base: roundTripperFunc(func(req *http.Request) (*http.Response, error) {
if got := req.Header.Get("User-Agent"); got != "custom-agent/1.0" {
t.Fatalf("unexpected user-agent: %q", got)
}
return &http.Response{
StatusCode: http.StatusOK,
Body: io.NopCloser(strings.NewReader("ok")),
Header: make(http.Header),
}, nil
}),
userAgent: "feedreader/0.1",
}

req, err := http.NewRequest(http.MethodGet, "https://example.com", nil)
if err != nil {
t.Fatalf("new request: %v", err)
}
req.Header.Set("User-Agent", "custom-agent/1.0")
resp, err := transport.RoundTrip(req)
if err != nil {
t.Fatalf("round trip: %v", err)
}
_ = resp.Body.Close()
}
31 changes: 29 additions & 2 deletions internal/service/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,39 @@ func New(cfg config.Config, repo *repository.SQLiteRepository) *FeedService {
cfg: cfg,
repo: repo,
sources: sources.Build(),
client: &http.Client{
Timeout: time.Duration(cfg.RequestTimeoutSec * float64(time.Second)),
client: newHTTPClient(cfg),
}
}

func newHTTPClient(cfg config.Config) *http.Client {
return &http.Client{
Timeout: time.Duration(cfg.RequestTimeoutSec * float64(time.Second)),
Transport: &userAgentTransport{
base: http.DefaultTransport,
userAgent: strings.TrimSpace(cfg.UserAgent),
},
}
}

type userAgentTransport struct {
base http.RoundTripper
userAgent string
}

func (t *userAgentTransport) RoundTrip(req *http.Request) (*http.Response, error) {
transport := t.base
if transport == nil {
transport = http.DefaultTransport
}
if strings.TrimSpace(t.userAgent) == "" || req.Header.Get("User-Agent") != "" {
return transport.RoundTrip(req)
}
clone := req.Clone(req.Context())
clone.Header = req.Header.Clone()
clone.Header.Set("User-Agent", t.userAgent)
return transport.RoundTrip(clone)
}

func (s *FeedService) StartScheduler(ctx context.Context) {
go func() {
location := loadScheduleLocation()
Expand Down
6 changes: 1 addition & 5 deletions internal/sources/alphaxiv.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,7 @@ func (AlphaXivSource) Label() string { return "alphaXiv" }
func (AlphaXivSource) HomePageURL() string { return "https://www.alphaxiv.org/" }

func (s AlphaXivSource) Fetch(ctx context.Context, client *http.Client) ([]domain.FeedItem, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, s.HomePageURL(), nil)
if err != nil {
return nil, err
}
resp, err := client.Do(req)
resp, err := getWithRetry(ctx, client, s.HomePageURL())
if err != nil {
return nil, err
}
Expand Down
6 changes: 1 addition & 5 deletions internal/sources/github.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,7 @@ func (GitHubTrendingSource) Label() string { return "GitHub Trending" }
func (GitHubTrendingSource) HomePageURL() string { return "https://github.com/trending" }

func (s GitHubTrendingSource) Fetch(ctx context.Context, client *http.Client) ([]domain.FeedItem, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, s.HomePageURL(), nil)
if err != nil {
return nil, err
}
resp, err := client.Do(req)
resp, err := getWithRetry(ctx, client, s.HomePageURL())
if err != nil {
return nil, err
}
Expand Down
141 changes: 50 additions & 91 deletions internal/sources/hackernews.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package sources

import (
"context"
"encoding/xml"
"encoding/json"
"html"
"io"
"net/http"
Expand All @@ -14,18 +14,16 @@ import (
"feedreader/internal/domain"
)

const hackerNewsFrontPageAPI = "https://hn.algolia.com/api/v1/search?tags=front_page"

type HackerNewsSource struct{}

func (HackerNewsSource) Key() string { return "hackernews" }
func (HackerNewsSource) Label() string { return "Hacker News" }
func (HackerNewsSource) HomePageURL() string { return "https://news.ycombinator.com/" }

func (s HackerNewsSource) Fetch(ctx context.Context, client *http.Client) ([]domain.FeedItem, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://hnrss.org/frontpage", nil)
if err != nil {
return nil, err
}
resp, err := client.Do(req)
resp, err := getWithRetry(ctx, client, hackerNewsFrontPageAPI)
if err != nil {
return nil, err
}
Expand All @@ -41,115 +39,76 @@ func (s HackerNewsSource) Fetch(ctx context.Context, client *http.Client) ([]dom
return parseHackerNews(body)
}

type hnRSS struct {
Channel struct {
Items []hnItem `xml:"item"`
} `xml:"channel"`
type hnFrontPage struct {
Hits []hnStory `json:"hits"`
}

type hnItem struct {
Title string `xml:"title"`
Description string `xml:"description"`
PubDate string `xml:"pubDate"`
Link string `xml:"link"`
Comments string `xml:"comments"`
Guid string `xml:"guid"`
Creator string `xml:"creator"`
type hnStory struct {
ObjectID string `json:"objectID"`
StoryID int `json:"story_id"`
Title string `json:"title"`
StoryTitle string `json:"story_title"`
URL string `json:"url"`
StoryURL string `json:"story_url"`
StoryText string `json:"story_text"`
CommentText string `json:"comment_text"`
Author string `json:"author"`
Points *int `json:"points"`
NumComments *int `json:"num_comments"`
CreatedAt string `json:"created_at"`
}

func parseHackerNews(payload []byte) ([]domain.FeedItem, error) {
var rss hnRSS
if err := xml.Unmarshal(payload, &rss); err != nil {
var rss hnFrontPage
if err := json.Unmarshal(payload, &rss); err != nil {
return nil, err
}
items := make([]domain.FeedItem, 0, len(rss.Channel.Items))
for idx, node := range rss.Channel.Items {
var publishedAt *time.Time
if node.PubDate != "" {
if parsed, err := time.Parse(time.RFC1123Z, node.PubDate); err == nil {
t := parsed.UTC()
publishedAt = &t
}
items := make([]domain.FeedItem, 0, len(rss.Hits))
for idx, node := range rss.Hits {
externalID := strings.TrimSpace(node.ObjectID)
if externalID == "" && node.StoryID > 0 {
externalID = strconv.Itoa(node.StoryID)
}
score := extractInt(node.Description, `Points:\s*(\d+)`)
commentsCount := extractInt(node.Description, `# Comments:\s*(\d+)`)
if externalID == "" {
continue
}
commentsURL := "https://news.ycombinator.com/item?id=" + externalID
metadata := map[string]any{}
if commentsCount != nil {
metadata["comments_count"] = *commentsCount
if node.NumComments != nil {
metadata["comments_count"] = *node.NumComments
}
items = append(items, domain.FeedItem{
Source: "hackernews",
ExternalID: extractStoryID(firstNonEmpty(node.Comments, node.Guid, node.Link)),
Title: strings.TrimSpace(node.Title),
URL: strings.TrimSpace(node.Link),
Summary: cleanString(extractHNSummary(node.Description)),
Author: cleanString(strings.TrimSpace(node.Creator)),
Score: score,
CommentsURL: cleanString(strings.TrimSpace(node.Comments)),
PublishedAt: publishedAt,
ExternalID: externalID,
Title: strings.TrimSpace(firstNonEmpty(node.Title, node.StoryTitle, externalID)),
URL: strings.TrimSpace(firstNonEmpty(node.URL, node.StoryURL, commentsURL)),
Summary: cleanString(extractHNSummary(firstNonEmpty(node.StoryText, node.CommentText))),
Author: cleanString(strings.TrimSpace(node.Author)),
Score: node.Points,
CommentsURL: cleanString(commentsURL),
PublishedAt: parseHackerNewsTime(node.CreatedAt),
SourceRank: idx + 1,
Metadata: metadata,
})
}
return items, nil
}

func extractStoryID(value string) string {
re := regexp.MustCompile(`id=(\d+)`)
if match := re.FindStringSubmatch(value); len(match) == 2 {
return match[1]
}
return strings.TrimSpace(value)
}

func extractHNSummary(description string) string {
head := strings.SplitN(description, "<hr>", 2)[0]
replacer := regexp.MustCompile(`<a [^>]+>|</a>|<[^>]+>`)
cleaned := replacer.ReplaceAllString(head, " ")
cleaned = html.UnescapeString(cleaned)
patterns := []*regexp.Regexp{
regexp.MustCompile(`Comments URL:\s*\S+`),
regexp.MustCompile(`Article URL:\s*\S+`),
regexp.MustCompile(`Points:\s*\d+`),
regexp.MustCompile(`# Comments:\s*\d+`),
regexp.MustCompile(`\s+`),
}
for _, pattern := range patterns {
cleaned = pattern.ReplaceAllString(cleaned, " ")
}
return strings.TrimSpace(cleaned)
}

func extractInt(value, pattern string) *int {
re := regexp.MustCompile(pattern)
match := re.FindStringSubmatch(value)
if len(match) != 2 {
return nil
}
parsed := strings.ReplaceAll(match[1], ",", "")
if parsed == "" {
func parseHackerNewsTime(value string) *time.Time {
if strings.TrimSpace(value) == "" {
return nil
}
out, err := strconv.Atoi(parsed)
parsed, err := time.Parse(time.RFC3339, strings.TrimSpace(value))
if err != nil {
return nil
}
return &out
utc := parsed.UTC()
return &utc
}

func firstNonEmpty(values ...string) string {
for _, value := range values {
if strings.TrimSpace(value) != "" {
return value
}
}
return ""
}

func cleanString(value string) *string {
value = strings.TrimSpace(value)
if value == "" {
return nil
}
return &value
func extractHNSummary(description string) string {
cleaned := regexp.MustCompile(`<a [^>]+>|</a>|<[^>]+>`).ReplaceAllString(description, " ")
cleaned = html.UnescapeString(cleaned)
cleaned = regexp.MustCompile(`\s+`).ReplaceAllString(cleaned, " ")
return strings.TrimSpace(cleaned)
}
Loading
Loading