blog

Categories     Timeline     RSS

Watch website changes with RSS

I use the Go program below to get notifications in my RSS reader when websites change that don’t offer RSS feeds themselves. For each website you would create a new command in main(), choose a shortname, enter the URL and enter a HTML node selector for the part you are interested in (thus also excluding surrounding stuff that might be dynamically created on each visit). You would then call this program with “go run webwatcher SHORTNAME” in your RSS reader.

package main

import (
	"bytes"
	"encoding/json"
	"fmt"
	. "git.gutmet.org/goutil.git/html"
	"git.gutmet.org/goutil.git/misc"
	xnetHtml "golang.org/x/net/html"
	"io/ioutil"
	"os"
	"path"
	"text/template"
	"time"
)

const (
	DL_LIMIT     = 15 * 1024 * 1024
	CACHE_FOLDER = "cache"
)

const RSS_TEMPLATE string = `<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/">
<channel>
<title><![CDATA[ {{.Shortname}} ]]></title>
<link><![CDATA[ {{.URL}} ]]></link>
<description><![CDATA[ {{.Shortname}} ]]></description>

    <item>
      <title><![CDATA[ {{.URL}} ]]></title>
      <content:encoded><![CDATA[ {{.LastContent}} ]]></content:encoded>
      <guid>{{.URL}}/{{.LastModified.Format "20060102-150405"}}</guid>
      <link>{{.URL}}</link>
      <pubDate>{{.LastModified.Format "Mon, 02 Jan 2006 15:04:05 -0700"}}</pubDate>
    </item>

</channel>
</rss>
`

func optPanic(err error) {
	if err != nil {
		panic(err)
	}
}

type command struct {
	shortname string
	URL       string
	selector  func(n *HtmlNode) bool
}

func (c *command) filename() string {
	return path.Join(CACHE_FOLDER, c.shortname)
}

func (c *command) getContent() string {
	b, err := misc.DownloadAll(c.URL, DL_LIMIT)
	optPanic(err)
	tmpdoc, err := xnetHtml.Parse(bytes.NewReader(b))
	optPanic(err)
	doc := (*HtmlNode)(tmpdoc)
	n := doc.Find(c.selector)
	var buf bytes.Buffer
	xnetHtml.Render(&buf, (*xnetHtml.Node)(n))
	return buf.String()
}

func unmarshalHPObject(filename string) HP {
	bytes, err := ioutil.ReadFile(filename)
	if err != nil {
		bytes = []byte{}
	}
	var hpObject HP
	err = json.Unmarshal(bytes, &hpObject)
	if err != nil {
		hpObject = HP{}
	}
	return hpObject
}

func marshalHPObject(filename string, hp HP) {
	bytes, err := json.MarshalIndent(hp, "", "  ")
	optPanic(err)
	err = ioutil.WriteFile(filename, bytes, 0644)
	optPanic(err)
}

func (c *command) genRSS() {
	content := c.getContent()
	hpObject := unmarshalHPObject(c.filename())
	hpObject.Shortname = c.shortname
	hpObject.URL = c.URL
	if content != hpObject.LastContent {
		hpObject.LastContent = content
		hpObject.LastModified = time.Now()
	}
	err := rssTemplate.Execute(os.Stdout, hpObject)
	optPanic(err)
	marshalHPObject(c.filename(), hpObject)
}

type HP struct {
	Shortname    string
	URL          string
	LastContent  string
	LastModified time.Time
}

var rssTemplate *template.Template

func main() {
	rssTemplate = template.Must(template.New("rss").Parse(RSS_TEMPLATE))
	os.Mkdir(CACHE_FOLDER, 0755)

	commands := []command{
		command{"stilldrinking", "https://www.stilldrinking.org/", IsTag("div").And(HasID("cont"))},
	}

	for _, command := range commands {
		if command.shortname == os.Args[1] {
			command.genRSS()
			os.Exit(0)
		}
	}
	fmt.Fprintln(os.Stderr, "unknown command", os.Args[1])
	os.Exit(-1)
}

Inline style block with Content Security Policy

To my surprise, it is possible to serve inline style blocks with Content Security Policy enabled:

Snippet latexpic

I uploaded a new snippet called ‘latexpic’ to my git server. It can be used to quickly generate PNGs from partial LaTeX source. It requires LaTeX, Imagemagick and Bash.

Example:

> latexpic "white on black"  '$ f(x) = e^x $ \\ $ f(\bar x) = \bar x^2 $'

… generated this:

latexpic result

<--Previous