webring/internal/favicon/favicon.go

213 lines
5.2 KiB
Go

package favicon
import (
"context"
"crypto/sha256"
"encoding/hex"
"errors"
"fmt"
"io"
"log"
"net/http"
"net/url"
"os"
"path/filepath"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
)
const (
htmlTimeout = 5 * time.Second
dlTimeout = 10 * time.Second
)
func GetAndStoreFavicon(siteURL, mediaFolder string, siteID int) (string, error) {
baseURL, err := url.Parse(siteURL)
if err != nil {
return "", err
}
faviconURL, err := getFaviconFromHTML(baseURL)
if err == nil {
faviconPath, err := downloadFavicon(faviconURL, baseURL, mediaFolder, siteID)
if err == nil {
return faviconPath, nil
}
log.Printf("Failed to download favicon from HTML link: %v", err)
}
commonFaviconNames := []string{
"favicon.ico",
"favicon.png",
"favicon.jpg",
"favicon.svg",
"favicon.gif",
"apple-touch-icon.png",
"apple-touch-icon-precomposed.png",
}
for _, name := range commonFaviconNames {
faviconURL := baseURL.ResolveReference(&url.URL{Path: name})
faviconPath, err := downloadFavicon(faviconURL, baseURL, mediaFolder, siteID)
if err == nil {
return faviconPath, nil
}
log.Printf("Failed to download %s: %v", name, err)
}
return "", errors.New("failed to find and download favicon")
}
func getFaviconFromHTML(baseURL *url.URL) (*url.URL, error) {
ctx, cancel := context.WithTimeout(context.Background(), htmlTimeout)
defer cancel()
req, err := http.NewRequestWithContext(ctx, "GET", baseURL.String(), http.NoBody)
if err != nil {
return nil, err
}
userAgent := "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " +
"(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
req.Header.Set("User-Agent", userAgent)
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
req.Header.Set("Accept-Language", "en-US,en;q=0.5")
req.Header.Set("Connection", "keep-alive")
req.Header.Set("Upgrade-Insecure-Requests", "1")
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer func() {
if cerr := resp.Body.Close(); cerr != nil {
log.Printf("Failed to close response body: %v", cerr)
}
}()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("failed to fetch HTML: status code %d", resp.StatusCode)
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, err
}
var faviconURL string
var exists bool
doc.Find("link[rel='icon'], link[rel='shortcut icon']").EachWithBreak(func(_ int, s *goquery.Selection) bool {
faviconURL, exists = s.Attr("href")
return !exists
})
if !exists {
log.Printf("No favicon link found for site: %s", baseURL.String())
return nil, errors.New("favicon not found in HTML")
}
parsedFaviconURL, err := url.Parse(faviconURL)
if err != nil {
return nil, err
}
if !parsedFaviconURL.IsAbs() {
parsedFaviconURL = baseURL.ResolveReference(parsedFaviconURL)
}
return parsedFaviconURL, nil
}
func safeJoinUnder(base, name string) (string, error) {
baseAbs, err := filepath.Abs(base)
if err != nil {
return "", err
}
candAbs, err := filepath.Abs(filepath.Join(base, name))
if err != nil {
return "", err
}
if candAbs != baseAbs && !strings.HasPrefix(candAbs, baseAbs+string(os.PathSeparator)) {
return "", fmt.Errorf("invalid path: %s", candAbs)
}
return candAbs, nil
}
func downloadFavicon(faviconURL, baseURL *url.URL, mediaFolder string, siteID int) (string, error) {
ctx, cancel := context.WithTimeout(context.Background(), dlTimeout)
defer cancel()
req, err := http.NewRequestWithContext(ctx, "GET", faviconURL.String(), http.NoBody)
if err != nil {
return "", err
}
ua := "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " +
"AppleWebKit/537.36 (KHTML, like Gecko) " +
"Chrome/91.0.4472.124 Safari/537.36"
req.Header.Set("User-Agent", ua)
req.Header.Set("Accept", "image/webp,image/apng,image/*,*/*;q=0.8")
req.Header.Set("Accept-Language", "en-US,en;q=0.5")
req.Header.Set("Connection", "keep-alive")
req.Header.Set("Referer", baseURL.String())
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
return "", err
}
defer func() {
if cerr := resp.Body.Close(); cerr != nil {
log.Printf("Failed to close response body: %v", cerr)
}
}()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("failed to download favicon: status code %d", resp.StatusCode)
}
hasher := sha256.New()
if _, hashErr := fmt.Fprintf(hasher, "%d-%s", siteID, faviconURL); hashErr != nil {
return "", hashErr
}
hash := hex.EncodeToString(hasher.Sum(nil))
ext := filepath.Ext(faviconURL.Path)
if ext == "" {
ext = ".ico"
}
fileName := fmt.Sprintf("favicon-%d-%s%s", siteID, hash[:8], ext)
absPath, err := safeJoinUnder(mediaFolder, fileName)
if err != nil {
return "", err
}
if mkErr := os.MkdirAll(filepath.Dir(absPath), 0o750); mkErr != nil {
return "", mkErr
}
out, err := os.OpenFile(absPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o600) // #nosec G304
if err != nil {
return "", err
}
defer func() {
if cerr := out.Close(); cerr != nil {
log.Printf("Failed to close file: %v", cerr)
}
}()
if _, err = io.Copy(out, resp.Body); err != nil {
if rmErr := os.Remove(absPath); rmErr != nil {
log.Printf("Failed to remove partial file %q: %v", absPath, rmErr)
}
return "", err
}
return fileName, nil
}