summary refs log tree commit diff
diff options
context:
space:
mode:
authorMatt Arnold <matt@thegnuguru.org>2023-06-06 14:04:47 -0400
committerMatt Arnold <matt@thegnuguru.org>2023-06-06 14:04:47 -0400
commit479bd45b11c8a274e2fc00ee3dfbde1f23bc8e5e (patch)
tree5a828722b60183699d0da0b8112e531a1f6001c1
parent2c34a12453ccd364e2ffe7a40580a0ffd2e0b8af (diff)
fix curl support, add better logging
-rw-r--r--data.go3
-rw-r--r--main.go102
-rw-r--r--middleware.go30
3 files changed, 63 insertions, 72 deletions
diff --git a/data.go b/data.go
index 1fd3049..91cd7ab 100644
--- a/data.go
+++ b/data.go
@@ -1,6 +1,6 @@
 package main
 
-var version = "10-CURRENT"
+var version = "[GITREV]: Netscape Wizardry "
 var UserAgents = map[string]string{
 	"Desktop":          "Mozilla/5.0 (X11; Linux x86_64; rv:108.0) Gecko/20100101 Firefox/108.0",
 	"Googlebot Mobile": "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/W.X.Y.Z Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html",
@@ -82,4 +82,5 @@ type GenaricArticle struct {
 	Content string
 	Image   string
 	Length  int
+	Text    string
 }
diff --git a/main.go b/main.go
index c692051..acdd635 100644
--- a/main.go
+++ b/main.go
@@ -6,6 +6,7 @@ import (
 	"encoding/base64"
 	"encoding/json"
 	"errors"
+	"fmt"
 	"io"
 	"log"
 	"net/http"
@@ -19,7 +20,6 @@ import (
 	"github.com/flosch/pongo2/v6"
 	readability "github.com/go-shiori/go-readability"
 
-	"golang.org/x/time/rate"
 	"piusbird.space/poseidon/nuparser"
 )
 
@@ -251,6 +251,7 @@ func fetch(fetchurl string, user_agent string, parser_select bool, original *htt
 		article.Title = raw_article.Title
 		article.Length = raw_article.Length
 		article.Image = raw_article.Image
+		article.Text = raw_article.TextContent
 	} else {
 		raw_article, err := nuparser.FromReader(&tmp2)
 		if err != nil {
@@ -278,6 +279,11 @@ func fetch(fetchurl string, user_agent string, parser_select bool, original *htt
 	if err != nil {
 		return nil, err
 	}
+	if strings.HasPrefix(original.Header.Get("User-Agent"), "curl") {
+		prettyBody := fmt.Sprintf("%s By %s\n %s\n ", article.Title, article.Byline, article.Text)
+		resp.Body = io.NopCloser(strings.NewReader(prettyBody))
+		return resp, err
+	}
 	resp.Body = io.NopCloser(strings.NewReader(out))
 
 	return resp, err
@@ -316,78 +322,38 @@ func indexHandler(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 
-	curl_mode := r.Header.Get("X-BP-Target-UserAgent")
-
-	if curl_mode != "" {
-		urlparts := strings.SplitN(r.URL.Path[1:], "/", 2)
-		if !validUserAgent(curl_mode) {
-			http.Error(w, "Agent not allowed "+curl_mode, http.StatusForbidden)
-		}
-		if len(urlparts) < 2 {
-			return
-		}
+	requesterUserAgent := r.Header.Get("User-Agent")
 
-		var mozreader = false
+	urlparts := strings.SplitN(r.URL.Path[1:], "/", 2)
+	if len(urlparts) < 2 {
+		return
+	}
 
-		if r.Header.Get("X-BP-MozReader") != "" {
-			mozreader = true
-		}
-		// Confusing part needed to hook up gemini starts here
-		// Basically we skip validation if it's a gemini uri and
-		// do our own thing with it
+	remurl := urlparts[0] + "//" + urlparts[1]
+	encoded_ua, err := encodeCookie(defaultCookie)
+	fakeCookie.Value = encoded_ua
+	if strings.HasPrefix(requesterUserAgent, "curl") {
+		_, err = validateURL(remurl)
 
-		remurl := urlparts[0] + "//" + urlparts[1]
-		ur, err := url.Parse(remurl)
 		if err != nil {
-			http.Error(w, err.Error(), http.StatusInternalServerError)
+			http.Error(w, err.Error()+" "+remurl, http.StatusTeapot)
 			return
 		}
-
-		log.Println("Honk!")
-		log.Println(ur.String())
+		ur, _ := url.Parse(remurl)
 		if ur.Scheme == "gemini" {
-			remurl += r.URL.RawQuery
-			resp, err := gmiFetch(remurl)
-			if err != nil {
-				http.Error(w, err.Error(), http.StatusBadRequest)
-				return
-			}
-			defer resp.Body.Close()
-			_, err = io.Copy(w, resp.Body)
-			if err != nil {
-				http.Error(w, err.Error(), http.StatusInternalServerError)
-			}
+			http.Error(w, "Gemini not supported through curl", http.StatusBadGateway)
 			return
 		}
-
-		_, err = validateURL(remurl)
+		a, err := fetch(remurl, default_agent, bool(ArcParser), r)
 		if err != nil {
 			http.Error(w, err.Error(), http.StatusInternalServerError)
 			return
 		}
 
-		resp, err := fetch(remurl, curl_mode, mozreader, r)
-		if err != nil {
-			http.Error(w, err.Error(), http.StatusInternalServerError)
-			return
-		}
-		defer resp.Body.Close()
-		_, err = io.Copy(w, resp.Body)
-		if err != nil {
-			http.Error(w, err.Error(), http.StatusInternalServerError)
-			return
-		}
+		io.Copy(w, a.Body)
 		return
 
 	}
-	urlparts := strings.SplitN(r.URL.Path[1:], "/", 2)
-	if len(urlparts) < 2 {
-		return
-	}
-
-	remurl := urlparts[0] + "//" + urlparts[1]
-	encoded_ua, err := encodeCookie(defaultCookie)
-	fakeCookie.Value = encoded_ua
 
 	if err != nil {
 		log.Println(err)
@@ -420,7 +386,7 @@ func indexHandler(w http.ResponseWriter, r *http.Request) {
 
 	_, err = validateURL(remurl)
 	if err != nil {
-		http.Error(w, err.Error()+" "+remurl, http.StatusInternalServerError)
+		http.Error(w, err.Error()+" "+remurl, http.StatusTeapot)
 		return
 	}
 	var cookie *http.Cookie
@@ -475,23 +441,17 @@ func indexHandler(w http.ResponseWriter, r *http.Request) {
 
 }
 
-// Add rate limitin per treehouse
-func rateLimitIndex(next func(writer http.ResponseWriter, request *http.Request)) http.HandlerFunc {
-	limiter := rate.NewLimiter(rate.Limit(rateBurst), rateMax)
-	return http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) {
-		if !limiter.Allow() {
-			http.Error(writer, "Enhance your calm", 420)
-			return
-		} else {
-			next(writer, request)
-		}
-	})
-}
 func main() {
 	srv := &http.Server{
 		ReadTimeout:  5 * time.Second,
 		WriteTimeout: 10 * time.Second,
 	}
+	logfile, err := os.OpenFile("access.log", os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
+	if err != nil {
+		log.Println("Error opening log")
+		panic(err)
+	}
+	defer logfile.Close()
 
 	port := os.Getenv("PORT")
 	if port == "" {
@@ -504,7 +464,7 @@ func main() {
 	debugmode := os.Getenv("DEBUG")
 	mux.HandleFunc("/redirect", postFormHandler)
 	mux.HandleFunc("/redirect/", postFormHandler)
-	mux.HandleFunc("/", rateLimitIndex(indexHandler))
+	mux.HandleFunc("/", LoggingWrapper(logfile, rateLimitIndex(indexHandler)))
 
 	if debugmode != "" {
 
@@ -517,7 +477,7 @@ func main() {
 	mux.Handle("/assets/", http.StripPrefix("/assets/", fs))
 	srv.Handler = mux
 
-	err := srv.ListenAndServe()
+	err = srv.ListenAndServe()
 	if err != nil {
 		panic(err)
 	}
diff --git a/middleware.go b/middleware.go
new file mode 100644
index 0000000..73725de
--- /dev/null
+++ b/middleware.go
@@ -0,0 +1,30 @@
+package main
+
+import (
+	"net/http"
+	"os"
+	"time"
+
+	"golang.org/x/time/rate"
+)
+
+// Add rate limitin per treehouse
+func rateLimitIndex(next func(writer http.ResponseWriter, request *http.Request)) http.HandlerFunc {
+	limiter := rate.NewLimiter(rate.Limit(rateBurst), rateMax)
+	return http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) {
+		if !limiter.Allow() {
+			http.Error(writer, "Enhance your calm", 420)
+			return
+		} else {
+			next(writer, request)
+		}
+	})
+}
+
+func LoggingWrapper(log *os.File, handler http.HandlerFunc) http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		handler(w, r)
+		t := time.Now()
+		log.WriteString(r.RemoteAddr + " " + t.Format(time.UnixDate) + " " + r.RequestURI + "\n")
+	}
+}