diff options
author | Matt Arnold <matt@thegnuguru.org> | 2023-06-06 14:04:47 -0400 |
---|---|---|
committer | Matt Arnold <matt@thegnuguru.org> | 2023-06-06 14:04:47 -0400 |
commit | 479bd45b11c8a274e2fc00ee3dfbde1f23bc8e5e (patch) | |
tree | 5a828722b60183699d0da0b8112e531a1f6001c1 | |
parent | 2c34a12453ccd364e2ffe7a40580a0ffd2e0b8af (diff) |
fix curl support, add better logging
-rw-r--r-- | data.go | 3 | ||||
-rw-r--r-- | main.go | 102 | ||||
-rw-r--r-- | middleware.go | 30 |
3 files changed, 63 insertions, 72 deletions
diff --git a/data.go b/data.go index 1fd3049..91cd7ab 100644 --- a/data.go +++ b/data.go @@ -1,6 +1,6 @@ package main -var version = "10-CURRENT" +var version = "[GITREV]: Netscape Wizardry " var UserAgents = map[string]string{ "Desktop": "Mozilla/5.0 (X11; Linux x86_64; rv:108.0) Gecko/20100101 Firefox/108.0", "Googlebot Mobile": "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/W.X.Y.Z Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html", @@ -82,4 +82,5 @@ type GenaricArticle struct { Content string Image string Length int + Text string } diff --git a/main.go b/main.go index c692051..acdd635 100644 --- a/main.go +++ b/main.go @@ -6,6 +6,7 @@ import ( "encoding/base64" "encoding/json" "errors" + "fmt" "io" "log" "net/http" @@ -19,7 +20,6 @@ import ( "github.com/flosch/pongo2/v6" readability "github.com/go-shiori/go-readability" - "golang.org/x/time/rate" "piusbird.space/poseidon/nuparser" ) @@ -251,6 +251,7 @@ func fetch(fetchurl string, user_agent string, parser_select bool, original *htt article.Title = raw_article.Title article.Length = raw_article.Length article.Image = raw_article.Image + article.Text = raw_article.TextContent } else { raw_article, err := nuparser.FromReader(&tmp2) if err != nil { @@ -278,6 +279,11 @@ func fetch(fetchurl string, user_agent string, parser_select bool, original *htt if err != nil { return nil, err } + if strings.HasPrefix(original.Header.Get("User-Agent"), "curl") { + prettyBody := fmt.Sprintf("%s By %s\n %s\n ", article.Title, article.Byline, article.Text) + resp.Body = io.NopCloser(strings.NewReader(prettyBody)) + return resp, err + } resp.Body = io.NopCloser(strings.NewReader(out)) return resp, err @@ -316,78 +322,38 @@ func indexHandler(w http.ResponseWriter, r *http.Request) { return } - curl_mode := r.Header.Get("X-BP-Target-UserAgent") - - if curl_mode != "" { - urlparts := strings.SplitN(r.URL.Path[1:], "/", 2) - if !validUserAgent(curl_mode) { - http.Error(w, "Agent not allowed "+curl_mode, http.StatusForbidden) - } - if len(urlparts) < 2 { - return - } + requesterUserAgent := r.Header.Get("User-Agent") - var mozreader = false + urlparts := strings.SplitN(r.URL.Path[1:], "/", 2) + if len(urlparts) < 2 { + return + } - if r.Header.Get("X-BP-MozReader") != "" { - mozreader = true - } - // Confusing part needed to hook up gemini starts here - // Basically we skip validation if it's a gemini uri and - // do our own thing with it + remurl := urlparts[0] + "//" + urlparts[1] + encoded_ua, err := encodeCookie(defaultCookie) + fakeCookie.Value = encoded_ua + if strings.HasPrefix(requesterUserAgent, "curl") { + _, err = validateURL(remurl) - remurl := urlparts[0] + "//" + urlparts[1] - ur, err := url.Parse(remurl) if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) + http.Error(w, err.Error()+" "+remurl, http.StatusTeapot) return } - - log.Println("Honk!") - log.Println(ur.String()) + ur, _ := url.Parse(remurl) if ur.Scheme == "gemini" { - remurl += r.URL.RawQuery - resp, err := gmiFetch(remurl) - if err != nil { - http.Error(w, err.Error(), http.StatusBadRequest) - return - } - defer resp.Body.Close() - _, err = io.Copy(w, resp.Body) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - } + http.Error(w, "Gemini not supported through curl", http.StatusBadGateway) return } - - _, err = validateURL(remurl) + a, err := fetch(remurl, default_agent, bool(ArcParser), r) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } - resp, err := fetch(remurl, curl_mode, mozreader, r) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - defer resp.Body.Close() - _, err = io.Copy(w, resp.Body) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } + io.Copy(w, a.Body) return } - urlparts := strings.SplitN(r.URL.Path[1:], "/", 2) - if len(urlparts) < 2 { - return - } - - remurl := urlparts[0] + "//" + urlparts[1] - encoded_ua, err := encodeCookie(defaultCookie) - fakeCookie.Value = encoded_ua if err != nil { log.Println(err) @@ -420,7 +386,7 @@ func indexHandler(w http.ResponseWriter, r *http.Request) { _, err = validateURL(remurl) if err != nil { - http.Error(w, err.Error()+" "+remurl, http.StatusInternalServerError) + http.Error(w, err.Error()+" "+remurl, http.StatusTeapot) return } var cookie *http.Cookie @@ -475,23 +441,17 @@ func indexHandler(w http.ResponseWriter, r *http.Request) { } -// Add rate limitin per treehouse -func rateLimitIndex(next func(writer http.ResponseWriter, request *http.Request)) http.HandlerFunc { - limiter := rate.NewLimiter(rate.Limit(rateBurst), rateMax) - return http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) { - if !limiter.Allow() { - http.Error(writer, "Enhance your calm", 420) - return - } else { - next(writer, request) - } - }) -} func main() { srv := &http.Server{ ReadTimeout: 5 * time.Second, WriteTimeout: 10 * time.Second, } + logfile, err := os.OpenFile("access.log", os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + log.Println("Error opening log") + panic(err) + } + defer logfile.Close() port := os.Getenv("PORT") if port == "" { @@ -504,7 +464,7 @@ func main() { debugmode := os.Getenv("DEBUG") mux.HandleFunc("/redirect", postFormHandler) mux.HandleFunc("/redirect/", postFormHandler) - mux.HandleFunc("/", rateLimitIndex(indexHandler)) + mux.HandleFunc("/", LoggingWrapper(logfile, rateLimitIndex(indexHandler))) if debugmode != "" { @@ -517,7 +477,7 @@ func main() { mux.Handle("/assets/", http.StripPrefix("/assets/", fs)) srv.Handler = mux - err := srv.ListenAndServe() + err = srv.ListenAndServe() if err != nil { panic(err) } diff --git a/middleware.go b/middleware.go new file mode 100644 index 0000000..73725de --- /dev/null +++ b/middleware.go @@ -0,0 +1,30 @@ +package main + +import ( + "net/http" + "os" + "time" + + "golang.org/x/time/rate" +) + +// Add rate limitin per treehouse +func rateLimitIndex(next func(writer http.ResponseWriter, request *http.Request)) http.HandlerFunc { + limiter := rate.NewLimiter(rate.Limit(rateBurst), rateMax) + return http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) { + if !limiter.Allow() { + http.Error(writer, "Enhance your calm", 420) + return + } else { + next(writer, request) + } + }) +} + +func LoggingWrapper(log *os.File, handler http.HandlerFunc) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + handler(w, r) + t := time.Now() + log.WriteString(r.RemoteAddr + " " + t.Format(time.UnixDate) + " " + r.RequestURI + "\n") + } +} |