diff options
author | Ted Unangst <tedu@tedunangst.com> | 2022-04-01 13:03:54 -0400 |
---|---|---|
committer | Ted Unangst <tedu@tedunangst.com> | 2022-04-01 13:03:54 -0400 |
commit | baa064be79f228610131ddbff360f0b8ebe0fce8 (patch) | |
tree | b5b5b0e028b0db11f47103e07822394a393e4bf7 | |
parent | a5667801a3282cc7ca04eea670986a75b824bef2 (diff) |
no comments
-rw-r--r-- | go.mod | 2 | ||||
-rw-r--r-- | scripts/script.go | 35 |
2 files changed, 26 insertions, 11 deletions
diff --git a/go.mod b/go.mod index 4e5e6b3..c5268e1 100644 --- a/go.mod +++ b/go.mod @@ -7,5 +7,3 @@ require ( github.com/traefik/yaegi v0.11.2 golang.org/x/net v0.0.0-20190415214537-1da14a5a36f2 ) - -replace github.com/traefik/yaegi => ../yaegi diff --git a/scripts/script.go b/scripts/script.go index 8eb69f0..f70a362 100644 --- a/scripts/script.go +++ b/scripts/script.go @@ -1,6 +1,19 @@ package filt +var bypass = false +var bypassmagic = "miniweb=bypass" + +var startingblob = 0 +var blobarray []interface{} + func Prefilter(req *http.Request) *http.Response { + bypass = false + blobarray = nil + + if strings.Contains(req.URL.RawQuery, "miniweb=bypass") { + bypass = true + req.URL.RawQuery = strings.ReplaceAll(req.URL.RawQuery, bypassmagic, "") + } } var nofilter = map[string]bool{ @@ -8,29 +21,30 @@ var nofilter = map[string]bool{ } func FilterHTML(w io.Writer, req *http.Request, root *html.Node) bool { - if req.URL.Path == "/" || nofilter[req.URL.Host] { + if bypass || req.URL.Path == "/" || nofilter[req.URL.Host] { return false } if req.URL.Host == "www.washingtonpost.com" { findwapoblob(root) } + badstuff := cascadia.MustCompile("div#comments") + if bad := badstuff.MatchFirst(root); bad != nil { + bad.Parent.RemoveChild(bad) + } + w.Write(prolog) sel := cascadia.MustCompile("article") articles := sel.MatchAll(root) if len(articles) > 0 { - w.Write(prolog) for _, a := range articles { - clean(w, a, req.URL) + cleanit(w, a, req.URL) } return true } - clean(w, root, req.URL) + cleanit(w, root, req.URL) return true } -var startingblob = 0 -var blobarray []interface{} - func findwapoblob(root *html.Node) { script := cascadia.MustCompile("script#__NEXT_DATA__").MatchFirst(root) if script == nil { @@ -212,6 +226,10 @@ func procscriptnode(w io.Writer, script *html.Node, baseurl *url.URL) { } } +func cleanit(w io.Writer, node *html.Node, baseurl *url.URL) { + clean(w, node, baseurl) +} + func clean(w io.Writer, node *html.Node, baseurl *url.URL) { switch node.Type { case html.ElementNode: @@ -292,7 +310,7 @@ func clean(w io.Writer, node *html.Node, baseurl *url.URL) { src = fmt.Sprintf("https://www.washingtonpost.com/wp-apps/imrs.php?src=%s&w=916", src) alt := m["credits_caption_display"].(string) fmt.Fprintf(w, `<img src="%s"><p>%s`, html.EscapeString(src), html.EscapeString(alt)) - startingblob = i+1 + startingblob = i + 1 return } } @@ -318,4 +336,3 @@ func clean(w io.Writer, node *html.Node, baseurl *url.URL) { } } } - |