shithub: rrss

Download patch

ref: feaf548ad5b9df3e0cee4de5fdc2c1e97c6e0a8a
author: sl <sl@gaff>
date: Sat Aug 24 23:37:23 EDT 2024

mercurial -> git: initial import (sans commit history)

--- /dev/null
+++ b/index.md
@@ -1,0 +1,43 @@
+     RRSS(1)                                                       RRSS(1)
+
+     NAME
+          rrss, trrss - RSS feed readers
+
+     SYNOPSIS
+          rrss [-f barf|blagh] [-r root] [-t tag] [-u url]
+
+          trrss [-f barf|blagh] [-r root] [-t tag] [-u url]
+
+     DESCRIPTION
+          Rrss pulls and parses an RSS feed.
+
+          There are a number of options:
+
+          -f   Place output in formatted directories for one
+               of two werc apps: barf or blagh. In the absence
+               of the -f flag, formatted output is placed on
+               stdout.
+
+               A file, links, is created in the root and is populated
+               with the URL of each feed item acquired. On sub-
+               sequent runs, URLs that appear in the links file are
+               not duplicated as new directories.
+
+          -r   Optionally, create barf or blagh directories
+               under root. Default is the current directory.
+
+          -t   Create tag for each post (barf only).
+
+          -u   The feed URL.
+
+          Trrss is a shell script that wraps the rrss program,
+          outputting plain text but preserving link URLs.
+
+     SOURCE
+          https://shithub.us/sl/rrss/HEAD/info.html
+     FORKS
+          https://only9fans.com/phil9/rrss/HEAD/info.html
+     SEE ALSO
+          http://werc.cat-v.org
+          http://werc.cat-v.org/apps/blagh
+          https://code.9front.org/hg/barf
--- /dev/null
+++ b/main.go
@@ -1,0 +1,225 @@
+// RSS feed reader that outputs plain text, werc/apps/barf, or werc/apps/blagh format.
+package main
+
+import (
+	"bufio"
+	"crypto/tls"
+	"flag"
+	"fmt"
+	"github.com/SlyMarbo/rss"
+	"html"
+	"io/ioutil"
+	"log"
+	"net/http"
+	"os"
+	"sort"
+	"strconv"
+	"strings"
+	"time"
+)
+
+var (
+	debug  = flag.Bool("d", false, "print debug msgs to stderr")
+	format = flag.String("f", "", "output format")
+	root   = flag.String("r", "", "output root")
+	tag    = flag.String("t", "", "feed tag (barf only)")
+	url    = flag.String("u", "", "feed url")
+)
+
+func usage() {
+	os.Stderr.WriteString("usage: rrss [-f barf|blagh] [-r root] [-t tag] [-u url]\n")
+	flag.PrintDefaults()
+	os.Exit(2)
+}
+
+func check(err error) {
+	if err != nil {
+		log.Fatal(err)
+	}
+}
+
+func fetchfeed(url string) (resp *http.Response, err error) {
+	defaultTransport := http.DefaultTransport.(*http.Transport)
+
+	// Create new Transport that ignores self-signed SSL
+	customTransport := &http.Transport{
+		Proxy:                 defaultTransport.Proxy,
+		DialContext:           defaultTransport.DialContext,
+		MaxIdleConns:          defaultTransport.MaxIdleConns,
+		IdleConnTimeout:       defaultTransport.IdleConnTimeout,
+		ExpectContinueTimeout: defaultTransport.ExpectContinueTimeout,
+		TLSHandshakeTimeout:   defaultTransport.TLSHandshakeTimeout,
+		TLSClientConfig:       &tls.Config{InsecureSkipVerify: true},
+	}
+	client := &http.Client{Transport: customTransport}
+	req, err := http.NewRequest("GET", url, nil)
+	if err != nil {
+		return nil, err
+	}
+	req.Header.Add("User-Agent", "Mozilla/5.0 (compatible; hjdicks)")
+	return client.Do(req)
+}
+
+func isold(date time.Time, link string, path string) bool {
+	file, err := os.OpenFile(path, os.O_CREATE|os.O_RDONLY, 0775)
+	if err != nil {
+		return true
+	}
+	defer file.Close()
+	s := fmt.Sprintf("%d_%s", date.Unix(), link)
+	scanner := bufio.NewScanner(file)
+	for scanner.Scan() {
+		if strings.Contains(s, scanner.Text()) {
+			return true
+		}
+	}
+	return false
+}
+
+func makeold(date time.Time, link string, path string) (int, error) {
+	f, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0775)
+	defer f.Close()
+	check(err)
+	if link == "" {
+		link = "empty"
+	}
+	s := fmt.Sprintf("%d_%s", date.Unix(), link)
+	return f.WriteString(s + "\n")
+}
+
+// https://code.9front.org/hg/barf
+func barf(url string) {
+	feed, err := rss.FetchByFunc(fetchfeed, url)
+	if *debug {
+		log.Printf("Tried fetching feed '%s' => err: %v\n", url, err)
+	}
+	check(err)
+	for _, i := range feed.Items {
+		d := "src"
+		links := "links"
+		if *root != "" {
+			d = *root + "/" + d
+			links = *root + "/" + links
+		}
+		if isold(i.Date, i.Link, links) {
+			continue
+		}
+		err = os.MkdirAll(d, 0775)
+		check(err)
+		f, err := os.Open(d)
+		defer f.Close()
+		check(err)
+		dn, err := f.Readdirnames(0)
+		check(err)
+		var di []int
+		for _, j := range dn {
+			k, _ := strconv.Atoi(j)
+			di = append(di, k)
+		}
+		sort.Ints(di)
+		n := 1
+		if di != nil {
+			n = di[len(di)-1] + 1
+		}
+		d = fmt.Sprintf("%s/%d", d, n)
+		if *debug == true {
+			fmt.Printf("%s len(di): %d n: %d d: %s\n",
+				i.Link, len(di), n, d)
+		}
+		err = os.MkdirAll(d, 0775)
+		check(err)
+		err = ioutil.WriteFile(d+"/title", []byte(i.Title+"\n"), 0775)
+		check(err)
+		err = ioutil.WriteFile(d+"/link", []byte(i.Link+"\n"), 0775)
+		check(err)
+		err = ioutil.WriteFile(d+"/date", []byte(i.Date.String()+"\n"), 0775)
+		check(err)
+		err = ioutil.WriteFile(d+"/body", []byte(conorsum(i)+"\n"), 0775)
+		check(err)
+		if *tag != "" {
+			err = os.MkdirAll(d+"/tags", 0775)
+			check(err)
+			for _, j := range strings.Split(*tag, " ") {
+				f, err := os.Create(d + "/tags/" + j)
+				f.Close()
+				check(err)
+			}
+		}
+		_, err = makeold(i.Date, i.Link, links)
+		check(err)
+	}
+}
+
+// http://werc.cat-v.org/apps/blagh
+func blagh(url string) {
+	feed, err := rss.FetchByFunc(fetchfeed, url)
+	check(err)
+	for _, i := range feed.Items {
+		d := fmt.Sprintf("%d/%02d/%02d", i.Date.Year(), i.Date.Month(), i.Date.Day())
+		links := "links"
+		if *root != "" {
+			d = *root + "/" + d
+			links = *root + "/" + links
+		}
+		if isold(i.Date, i.Link, links) {
+			continue
+		}
+		f, _ := os.Open(d) // directory will usually not exist yet
+		defer f.Close()
+		n, _ := f.Readdirnames(0)
+		d = fmt.Sprintf("%s/%d", d, len(n))
+		err = os.MkdirAll(d, 0775)
+		check(err)
+		err = ioutil.WriteFile(
+			d+"/index.md",
+			[]byte(i.Title+"\n===\n\n"+conorsum(i)+"\n"),
+			0775,
+		)
+		check(err)
+		_, err = makeold(i.Date, i.Link, links)
+		check(err)
+	}
+}
+
+func stdout(url string) {
+	feed, err := rss.FetchByFunc(fetchfeed, url)
+	if *debug {
+		log.Printf("Tried fetching feed '%s' => err: %v\n", url, err)
+	}
+	check(err)
+	for _, i := range feed.Items {
+		fmt.Printf("title: %s\nlink: %s\ndate: %s\n%s\n\n",
+			i.Title, i.Link, i.Date, conorsum(i))
+	}
+}
+
+func conorsum(i *rss.Item) string {
+	var s string
+	switch {
+	case len(i.Content) > 0:
+		s = i.Content
+	case len(i.Summary) > 0:
+		s = i.Summary
+	default:
+		return ""
+	}
+	return html.UnescapeString(s)
+}
+
+func main() {
+	flag.Usage = usage
+	flag.Parse()
+	if *url == "" {
+		usage()
+	}
+	switch *format {
+	case "barf":
+		barf(*url)
+	case "blagh":
+		blagh(*url)
+	case "":
+		stdout(*url)
+	default:
+		usage()
+	}
+}
--- /dev/null
+++ b/trrss
@@ -1,0 +1,9 @@
+#!/bin/rc
+# Run rrss and convert HTML to plain text, retaining link URLs.
+# NOTE: Requires plan9port or 9base. Fix the shebang path to rc.
+rrss $* | sed '
+	s/^title:.*$/<p>&/g
+	s/^link:.*$/<br>&/g
+	s/^date:.*$/<br>&<br>/g
+	' | tcs -t html | htmlfmt -a -c utf-8 | uhtml
+