summary refs log tree commit diff
diff options
context:
space:
mode:
authorTed Unangst <tedu@tedunangst.com>2022-04-01 13:03:54 -0400
committerTed Unangst <tedu@tedunangst.com>2022-04-01 13:03:54 -0400
commitbaa064be79f228610131ddbff360f0b8ebe0fce8 (patch)
treeb5b5b0e028b0db11f47103e07822394a393e4bf7
parenta5667801a3282cc7ca04eea670986a75b824bef2 (diff)
no comments
-rw-r--r--go.mod2
-rw-r--r--scripts/script.go35
2 files changed, 26 insertions, 11 deletions
diff --git a/go.mod b/go.mod
index 4e5e6b3..c5268e1 100644
--- a/go.mod
+++ b/go.mod
@@ -7,5 +7,3 @@ require (
 	github.com/traefik/yaegi v0.11.2
 	golang.org/x/net v0.0.0-20190415214537-1da14a5a36f2
 )
-
-replace github.com/traefik/yaegi => ../yaegi
diff --git a/scripts/script.go b/scripts/script.go
index 8eb69f0..f70a362 100644
--- a/scripts/script.go
+++ b/scripts/script.go
@@ -1,6 +1,19 @@
 package filt
 
+var bypass = false
+var bypassmagic = "miniweb=bypass"
+
+var startingblob = 0
+var blobarray []interface{}
+
 func Prefilter(req *http.Request) *http.Response {
+	bypass = false
+	blobarray = nil
+
+	if strings.Contains(req.URL.RawQuery, "miniweb=bypass") {
+		bypass = true
+		req.URL.RawQuery = strings.ReplaceAll(req.URL.RawQuery, bypassmagic, "")
+	}
 }
 
 var nofilter = map[string]bool{
@@ -8,29 +21,30 @@ var nofilter = map[string]bool{
 }
 
 func FilterHTML(w io.Writer, req *http.Request, root *html.Node) bool {
-	if req.URL.Path == "/" || nofilter[req.URL.Host] {
+	if bypass || req.URL.Path == "/" || nofilter[req.URL.Host] {
 		return false
 	}
 	if req.URL.Host == "www.washingtonpost.com" {
 		findwapoblob(root)
 	}
+	badstuff := cascadia.MustCompile("div#comments")
+	if bad := badstuff.MatchFirst(root); bad != nil {
+		bad.Parent.RemoveChild(bad)
+	}
+
 	w.Write(prolog)
 	sel := cascadia.MustCompile("article")
 	articles := sel.MatchAll(root)
 	if len(articles) > 0 {
-		w.Write(prolog)
 		for _, a := range articles {
-			clean(w, a, req.URL)
+			cleanit(w, a, req.URL)
 		}
 		return true
 	}
-	clean(w, root, req.URL)
+	cleanit(w, root, req.URL)
 	return true
 }
 
-var startingblob = 0
-var blobarray []interface{}
-
 func findwapoblob(root *html.Node) {
 	script := cascadia.MustCompile("script#__NEXT_DATA__").MatchFirst(root)
 	if script == nil {
@@ -212,6 +226,10 @@ func procscriptnode(w io.Writer, script *html.Node, baseurl *url.URL) {
 	}
 }
 
+func cleanit(w io.Writer, node *html.Node, baseurl *url.URL) {
+	clean(w, node, baseurl)
+}
+
 func clean(w io.Writer, node *html.Node, baseurl *url.URL) {
 	switch node.Type {
 	case html.ElementNode:
@@ -292,7 +310,7 @@ func clean(w io.Writer, node *html.Node, baseurl *url.URL) {
 							src = fmt.Sprintf("https://www.washingtonpost.com/wp-apps/imrs.php?src=%s&w=916", src)
 							alt := m["credits_caption_display"].(string)
 							fmt.Fprintf(w, `<img src="%s"><p>%s`, html.EscapeString(src), html.EscapeString(alt))
-							startingblob = i+1
+							startingblob = i + 1
 							return
 						}
 					}
@@ -318,4 +336,3 @@ func clean(w io.Writer, node *html.Node, baseurl *url.URL) {
 		}
 	}
 }
-