wb4/ssg6

#!/bin/sh -e
#
# https://rgz.ee/bin/ssg6
# Copyright 2018-2019 Roman Zolotarev <hi@romanzolotarev.com>
#
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#

main() {
	test -n "$1" || usage
	test -n "$2" || usage
	test -n "$3" || usage
	test -n "$4" || usage
	test -d "$1" || no_dir "$1"
	test -d "$2" || no_dir "$2"

	src=$(readlink_f "$1")
	dst=$(readlink_f "$2")

	IGNORE=$(
		if ! test -f "$src/.ssgignore"; then
			printf ' ! -path "*/.*"'
			return
		fi
		while read -r x; do
			test -n "$x" || continue
			printf ' ! -path "*/%s*"' "$x"
		done <"$src/.ssgignore"
	)

	# files

	title="$3"

	h_file="$src/_header.html"
	f_file="$src/_footer.html"
	test -f "$f_file" && FOOTER=$(cat "$f_file") && export FOOTER
	test -f "$h_file" && HEADER=$(cat "$h_file") && export HEADER

	list_dirs "$src" |
		(cd "$src" && cpio -pdu "$dst")

	fs=$(
		if test -f "$dst/.files"; then
			list_affected_files "$src" "$dst/.files" | uniq
		else
			list_files "$1"
		fi
	)

	if test -n "$fs"; then
		echo "$fs" | tee "$dst/.files"

		if echo "$fs" | grep -q '\.md$'; then
			if test -x "$(which lowdown 2>/dev/null)"; then
				echo "$fs" | grep '\.md$' |
					render_md_files_lowdown "$src" "$dst" "$title"
			else
				if test -x "$(which Markdown.pl 2>/dev/null)"; then
					echo "$fs" | grep '\.md$' |
						render_md_files_Markdown_pl "$src" "$dst" "$title"
				else
					echo "couldn't find lowdown nor Markdown.pl"
					exit 3
				fi
			fi
		fi

		echo "$fs" | grep '\.html$' |
			render_html_files "$src" "$dst" "$title"

		echo "$fs" | grep -Ev '\.md$|\.html$' |
			(cd "$src" && cpio -pu "$dst")
	fi


	printf '[ssg] ' >&2
	print_status 'file, ' 'files, ' "$fs" >&2

	# sitemap

	base_url="$4"
	date=$(date +%Y-%m-%d)
	urls=$(list_pages "$src")

	test -n "$urls" &&
		render_sitemap "$urls" "$base_url" "$date" >"$dst/sitemap.xml"

	if echo "$fs" | grep "index.md$"; then
		render_article_list "$urls" "$base_url" "$dst" "$src"
	fi

	print_status 'url' 'urls' "$urls" >&2
	echo >&2
}

readlink_f() {
	file="$1"
	cd "$(dirname "$file")"
	file=$(basename "$file")
	while test -L "$file"; do
		file=$(readlink "$file")
		cd "$(dirname "$file")"
		file=$(basename "$file")
	done
	dir=$(pwd -P)
	echo "$dir/$file"
}

print_status() {
	test -z "$3" && printf 'no %s' "$2" && return

	echo "$3" | awk -v singular="$1" -v plural="$2" '
	END {
		if (NR==1) printf NR " " singular
		if (NR>1) printf NR " " plural
	}'
}

usage() {
	echo "usage: ${0##*/} src dst title base_url" >&2
	exit 1
}

no_dir() {
	echo "${0##*/}: $1: No such directory" >&2
	exit 2
}

list_dirs() {
	cd "$1" && eval "find . -type d ! -name '.' ! -path '*/_*' $IGNORE"
}

list_files() {
	cd "$1" && eval "find . -type f ! -name '.' ! -path '*/_*' $IGNORE"
}

list_dependant_files() {
	e="\\( -name '*.html' -o -name '*.md' -o -name '*.css' -o -name '*.js' \\)"
	cd "$1" && eval "find . -type f ! -name '.' ! -path '*/_*' $IGNORE $e"
}

list_newer_files() {
	cd "$1" && eval "find . -type f ! -name '.' $IGNORE -newer $2"
}

has_partials() {
	grep -qE '^./_.*\.html$|^./_.*\.js$|^./_.*\.css$'
}

list_affected_files() {
	fs=$(list_newer_files "$1" "$2")

	if echo "$fs" | has_partials; then
		list_dependant_files "$1"
	else
		echo "$fs"
	fi

	if echo "$fs" | grep "posts/"; then
		if ! echo "$fs" | grep "index\.md"; then
			echo index.md
		fi
	fi
}

root_prefix() {
	stripped_path="$(echo $f | sed "s|\./||g")"
	sub_count=$(echo "$stripped_path" | grep -o / | wc -l)
	yes "../" | head -n $((sub_count)) | tr -d '\n'
}

render_html_files() {
	while read -r f; do
		render_html_file "$3" <"$1/$f" >"$2/$f"
	done
}

render_md_files_lowdown() {
	while read -r f; do
		lowdown \
			--html-no-escapehtml \
			--html-no-skiphtml \
			--parse-no-metadata \
			--parse-no-autolink <"$1/$f" |
			render_html_file "$3" \
				>"$2/${f%\.md}.html"
	done
}

render_md_files_Markdown_pl() {
	while read -r f; do
		Markdown.pl <"$1/$f" |
			render_html_file "$3" \
				>"$2/${f%\.md}.html"
	done
}

render_html_file() {
	# h/t Devin Teske
	awk -v title="$1" -v pfx="$(root_prefix)" '
	{ body = body "\n" $0 }
	END {
		body = substr(body, 2)
		if (body ~ /<\/?[Hh][Tt][Mm][Ll]/) {
			print body
			exit
		}
		if (match(body, /<[[:space:]]*[Hh]1(>|[[:space:]][^>]*>)/)) {
			t = substr(body, RSTART + RLENGTH)
			sub("<[[:space:]]*/[[:space:]]*[Hh]1.*", "", t)
			gsub(/^[[:space:]]*|[[:space:]]$/, "", t)
			if (t) title = t " &mdash; " title
		}
		n = split(ENVIRON["HEADER"], header, /\n/)
		for (i = 1; i <= n; i++) {
			if (match(tolower(header[i]), "<title></title>")) {
				head = substr(header[i], 1, RSTART - 1)
				tail = substr(header[i], RSTART + RLENGTH)
				print head "<title>" title "</title>" tail
			} else if (match(header[i], "/WEBROOT/")){
				head = substr(header[i], 1, RSTART - 1)
				tail = substr(header[i], RSTART + RLENGTH)
				print head pfx tail
			} else print header[i]
		}
		print body
		print ENVIRON["FOOTER"]
	}'
}

list_pages() {
	e="\\( -name '*.html' -o -name '*.md' \\)"
	cd "$1" && eval "find . -type f ! -path '*/.*' ! -path '*/_*' $IGNORE $e" |
		sed 's#^./##;s#.md$#.html#;s#/index.html$#/#'
}

render_article_list() {
	urls="$1"
	base_url="$2"
	dst="$3"
	src="$4"

	sorted_urls=""

	for i in $urls; do
		if echo $i | grep '^posts/' > /dev/null; then
			DATE=$(cat $dst/$i | grep '<!--- date: .*-.*-.* --->' | cut -d ' ' -f 3-3)
			url="$DATE;$i"
			sorted_urls="$(printf "$sorted_urls\n$url" | sort -r)"
		fi
	done

	items=""

	for i in $sorted_urls; do
		url=$(echo $i | cut -d';' -f2-)

		CLEAN_DATE=$(cat $dst/$url | grep '<!--- date: .*-.*-.* --->' | cut -d ' ' -f 3-3)
		TITLE=$(cat $dst/$url | grep '<!--- title: .*--->' | awk '{for(i=3;i<NF;i++) printf $i" " }' | xargs)

		DATE=$(date -d "$CLEAN_DATE" +"%B %d, %Y")

		item="<li><a href='$url'>$TITLE</a><i>$DATE</i></li>"
		items="$items $item"
	done

	sed -i "s|</article>|<ul class='articles'>$items</ul></article>|g" "$dst/index.html"
}

render_sitemap() {
	urls="$1"
	base_url="$2"
	date="$3"

	echo '<?xml version="1.0" encoding="UTF-8"?>'
	echo '<urlset'
	echo 'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"'
	echo 'xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9'
	echo 'http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"'
	echo 'xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'
	echo "$urls" |
		sed -E 's#^(.*)$#<url><loc>'"$base_url"'/\1</loc><lastmod>'"$date"'</lastmod><priority>1.0</priority></url>#'
	echo '</urlset>'
}

main "$@"
Initial commit 2022-04-02 22:04:57 -04:00			`#!/bin/sh -e`
			`#`
			`# https://rgz.ee/bin/ssg6`
			`# Copyright 2018-2019 Roman Zolotarev <hi@romanzolotarev.com>`
			`#`
			`# Permission to use, copy, modify, and/or distribute this software for any`
			`# purpose with or without fee is hereby granted, provided that the above`
			`# copyright notice and this permission notice appear in all copies.`
			`#`
			`# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES`
			`# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF`
			`# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR`
			`# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES`
			`# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN`
			`# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF`
			`# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.`
			`#`

			`main() {`
			`test -n "$1" \|\| usage`
			`test -n "$2" \|\| usage`
			`test -n "$3" \|\| usage`
			`test -n "$4" \|\| usage`
			`test -d "$1" \|\| no_dir "$1"`
			`test -d "$2" \|\| no_dir "$2"`

			`src=$(readlink_f "$1")`
			`dst=$(readlink_f "$2")`

			`IGNORE=$(`
			`if ! test -f "$src/.ssgignore"; then`
			`printf ' ! -path "/."'`
			`return`
			`fi`
			`while read -r x; do`
			`test -n "$x" \|\| continue`
			`printf ' ! -path "/%s"' "$x"`
			`done <"$src/.ssgignore"`
			`)`

			`# files`

			`title="$3"`

			`h_file="$src/_header.html"`
			`f_file="$src/_footer.html"`
			`test -f "$f_file" && FOOTER=$(cat "$f_file") && export FOOTER`
			`test -f "$h_file" && HEADER=$(cat "$h_file") && export HEADER`

			`list_dirs "$src" \|`
			`(cd "$src" && cpio -pdu "$dst")`

			`fs=$(`
			`if test -f "$dst/.files"; then`
			`list_affected_files "$src" "$dst/.files" \| uniq`
			`else`
			`list_files "$1"`
			`fi`
			`)`

			`if test -n "$fs"; then`
			`echo "$fs" \| tee "$dst/.files"`

			`if echo "$fs" \| grep -q '\.md$'; then`
			`if test -x "$(which lowdown 2>/dev/null)"; then`
			`echo "$fs" \| grep '\.md$' \|`
			`render_md_files_lowdown "$src" "$dst" "$title"`
			`else`
			`if test -x "$(which Markdown.pl 2>/dev/null)"; then`
			`echo "$fs" \| grep '\.md$' \|`
			`render_md_files_Markdown_pl "$src" "$dst" "$title"`
			`else`
			`echo "couldn't find lowdown nor Markdown.pl"`
			`exit 3`
			`fi`
			`fi`
			`fi`

			`echo "$fs" \| grep '\.html$' \|`
			`render_html_files "$src" "$dst" "$title"`

			`echo "$fs" \| grep -Ev '\.md$\|\.html$' \|`
			`(cd "$src" && cpio -pu "$dst")`
			`fi`


			`printf '[ssg] ' >&2`
			`print_status 'file, ' 'files, ' "$fs" >&2`

			`# sitemap`

			`base_url="$4"`
			`date=$(date +%Y-%m-%d)`
			`urls=$(list_pages "$src")`

			`test -n "$urls" &&`
			`render_sitemap "$urls" "$base_url" "$date" >"$dst/sitemap.xml"`

			`if echo "$fs" \| grep "index.md$"; then`
			`render_article_list "$urls" "$base_url" "$dst" "$src"`
			`fi`

			`print_status 'url' 'urls' "$urls" >&2`
			`echo >&2`
			`}`

			`readlink_f() {`
			`file="$1"`
			`cd "$(dirname "$file")"`
			`file=$(basename "$file")`
			`while test -L "$file"; do`
			`file=$(readlink "$file")`
			`cd "$(dirname "$file")"`
			`file=$(basename "$file")`
			`done`
			`dir=$(pwd -P)`
			`echo "$dir/$file"`
			`}`

			`print_status() {`
			`test -z "$3" && printf 'no %s' "$2" && return`

			`echo "$3" \| awk -v singular="$1" -v plural="$2" '`
			`END {`
			`if (NR==1) printf NR " " singular`
			`if (NR>1) printf NR " " plural`
			`}'`
			`}`

			`usage() {`
			`echo "usage: ${0##*/} src dst title base_url" >&2`
			`exit 1`
			`}`

			`no_dir() {`
			`echo "${0##*/}: $1: No such directory" >&2`
			`exit 2`
			`}`

			`list_dirs() {`
			`cd "$1" && eval "find . -type d ! -name '.' ! -path '/_' $IGNORE"`
			`}`

			`list_files() {`
			`cd "$1" && eval "find . -type f ! -name '.' ! -path '/_' $IGNORE"`
			`}`

			`list_dependant_files() {`
			`e="\\( -name '.html' -o -name '.md' -o -name '.css' -o -name '.js' \\)"`
			`cd "$1" && eval "find . -type f ! -name '.' ! -path '/_' $IGNORE $e"`
			`}`

			`list_newer_files() {`
			`cd "$1" && eval "find . -type f ! -name '.' $IGNORE -newer $2"`
			`}`

			`has_partials() {`
			`grep -qE '^./_.\.html$\|^./_.\.js$\|^./_.*\.css$'`
			`}`

			`list_affected_files() {`
			`fs=$(list_newer_files "$1" "$2")`

			`if echo "$fs" \| has_partials; then`
			`list_dependant_files "$1"`
			`else`
			`echo "$fs"`
			`fi`

			`if echo "$fs" \| grep "posts/"; then`
			`if ! echo "$fs" \| grep "index\.md"; then`
			`echo index.md`
			`fi`
			`fi`
			`}`

			`root_prefix() {`
			`stripped_path="$(echo $f \| sed "s\|\./\|\|g")"`
			`sub_count=$(echo "$stripped_path" \| grep -o / \| wc -l)`
			`yes "../" \| head -n $((sub_count)) \| tr -d '\n'`
			`}`

			`render_html_files() {`
			`while read -r f; do`
			`render_html_file "$3" <"$1/$f" >"$2/$f"`
			`done`
			`}`

			`render_md_files_lowdown() {`
			`while read -r f; do`
			`lowdown \`
			`--html-no-escapehtml \`
			`--html-no-skiphtml \`
			`--parse-no-metadata \`
			`--parse-no-autolink <"$1/$f" \|`
			`render_html_file "$3" \`
			`>"$2/${f%\.md}.html"`
			`done`
			`}`

			`render_md_files_Markdown_pl() {`
			`while read -r f; do`
			`Markdown.pl <"$1/$f" \|`
			`render_html_file "$3" \`
			`>"$2/${f%\.md}.html"`
			`done`
			`}`

			`render_html_file() {`
			`# h/t Devin Teske`
			`awk -v title="$1" -v pfx="$(root_prefix)" '`
			`{ body = body "\n" $0 }`
			`END {`
			`body = substr(body, 2)`
			`if (body ~ /<\/?[Hh][Tt][Mm][Ll]/) {`
			`print body`
			`exit`
			`}`
			`if (match(body, /<[[:space:]][Hh]1(>\|[[:space:]][^>]>)/)) {`
			`t = substr(body, RSTART + RLENGTH)`
			`sub("<[[:space:]]/[[:space:]][Hh]1.*", "", t)`
			`gsub(/^[[:space:]]*\|[[:space:]]$/, "", t)`
			`if (t) title = t " — " title`
			`}`
			`n = split(ENVIRON["HEADER"], header, /\n/)`
			`for (i = 1; i <= n; i++) {`
			`if (match(tolower(header[i]), "<title></title>")) {`
			`head = substr(header[i], 1, RSTART - 1)`
			`tail = substr(header[i], RSTART + RLENGTH)`
			`print head "<title>" title "</title>" tail`
			`} else if (match(header[i], "/WEBROOT/")){`
			`head = substr(header[i], 1, RSTART - 1)`
			`tail = substr(header[i], RSTART + RLENGTH)`
			`print head pfx tail`
			`} else print header[i]`
			`}`
			`print body`
			`print ENVIRON["FOOTER"]`
			`}'`
			`}`

			`list_pages() {`
			`e="\\( -name '.html' -o -name '.md' \\)"`
			`cd "$1" && eval "find . -type f ! -path '/.' ! -path '/_' $IGNORE $e" \|`
			`sed 's#^./##;s#.md$#.html#;s#/index.html$#/#'`
			`}`

			`render_article_list() {`
			`urls="$1"`
			`base_url="$2"`
			`dst="$3"`
			`src="$4"`

			`sorted_urls=""`

			`for i in $urls; do`
			`if echo $i \| grep '^posts/' > /dev/null; then`
			`DATE=$(cat $dst/$i \| grep '<!--- date: .-.-.* --->' \| cut -d ' ' -f 3-3)`
Fix article sorting by date 2022-06-07 20:23:36 -04:00			`url="$DATE;$i"`
Use printf instead of echo in script 2022-06-08 20:20:15 -04:00			`sorted_urls="$(printf "$sorted_urls\n$url" \| sort -r)"`
Initial commit 2022-04-02 22:04:57 -04:00			`fi`
			`done`

			`items=""`
Fix article sorting by date 2022-06-07 20:23:36 -04:00
Initial commit 2022-04-02 22:04:57 -04:00			`for i in $sorted_urls; do`
Fix article sorting by date 2022-06-07 20:23:36 -04:00			`url=$(echo $i \| cut -d';' -f2-)`

			`CLEAN_DATE=$(cat $dst/$url \| grep '<!--- date: .-.-.* --->' \| cut -d ' ' -f 3-3)`
			`TITLE=$(cat $dst/$url \| grep '<!--- title: .*--->' \| awk '{for(i=3;i<NF;i++) printf $i" " }' \| xargs)`
Initial commit 2022-04-02 22:04:57 -04:00
			`DATE=$(date -d "$CLEAN_DATE" +"%B %d, %Y")`

Fix article sorting by date 2022-06-07 20:23:36 -04:00			`item="<li><a href='$url'>$TITLE</a><i>$DATE</i></li>"`
Initial commit 2022-04-02 22:04:57 -04:00			`items="$items $item"`
			`done`

			`sed -i "s\|</article>\|<ul class='articles'>$items</ul></article>\|g" "$dst/index.html"`
			`}`

			`render_sitemap() {`
			`urls="$1"`
			`base_url="$2"`
			`date="$3"`

			`echo '<?xml version="1.0" encoding="UTF-8"?>'`
			`echo '<urlset'`
			`echo 'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"'`
			`echo 'xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9'`
			`echo 'http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"'`
			`echo 'xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'`
			`echo "$urls" \|`
			`sed -E 's#^(.*)$#<url><loc>'"$base_url"'/\1</loc><lastmod>'"$date"'</lastmod><priority>1.0</priority></url>#'`
			`echo '</urlset>'`
			`}`

			`main "$@"`