PixelbScripts/scripts/bashfeed at master · QuantumAI-CAN/PixelbScripts

executable file
146 lines (126 loc) · 5.36 KB
# Generate an RSS 2.0 feed directly from your existing web site
# Author    - Pádraig Brady <P@draigBrady.com>
# Licence   - LGPLV2
# Releases  -
#   1.0     - Jun 19 2006 - Initial release
#   1.1     - Jun 26 2006 - Exclude files with "Exclude from bashfeed"
#                           HTML comment within the first 10 lines.
#   1.2     - May 01 2007 - Add author elements (from html if present)
#   1.4     - May 04 2011
#    http://github.com/pixelb/scripts/commits/master/scripts/bashfeed
# Just run this script from the root directory of your web site and
# it will generate feed items for the newest files. Generally I do this
# just before I sync my local web site copy to my public server.
# One can generate a feed for a subset of the site by still running
# from the root directory, and passing a subdirectory to start at.
# To change the feed TITLE and DESCRIPTION, set those environment
# variables before running the script.
# Which files are selected and excluded can be configured below.
# Note for html files it will extract the following elements if present
#     <title>Item title</title>
#     <meta name="description" content="One line item description">
#     <meta name="keywords" content="Item tags">
#   </head>
# Note this script will keep the same item guid for an updated file.
# Just updating the pubDate will not cause liferea 1.0.11 at least
# to mark the item as updated (or update the timestamp even).
# One must change the description or title also, and so
# I set the (hidden) description to the file timestamp.
# Testing with thunderbird 1.0.8 shows that it indexes on link
# and so wont ever show updates to other fields. Therefore I append #seconds
# to the link to force it to create a new entry for an updated item.
# Note you may find the http://www.pixelbeat.org/scripts/fix script
# useful for doing edits to files that you don't want to show up
# as updated content in the feed, or generally edit a file without
# changing the modification date.
num_files=10
site="www.pixelbeat.org"
author="P@draigBrady.com (Pádraig Brady)"
suggested_update_freq=1440 #mins
: ${TITLE=$site}
: ${DESCRIPTION="latest from $site"}
#files starting with . | files without a . | files ending in .c .cpp ...
include_re='(^|/)[.].+|(^|/)[^.]+$|[.](c|cpp|py|sh|rc|tips|fortune|html)$' #only show these files
exclude_re='(\.git/|priv/|tmp/|.htaccess|xvpics|timeline\.html|modified\.html|head\.html|header\.html|footer\.html|footer-home\.html|adds\.html|last\.html|fslint/(NEWS\.html|md5sum)|README)' #don't show these paths
default_files="index.html index.shtml index.php"
############# No user serviceable parts below ###################
for file in $default_files; do
    replace_default_files="$replace_default_files; s/\(.*\)$file$/\1/;t"
echo '<?xml version="1.0" encoding="utf-8"?>
<?xml-stylesheet type="text/css" href="rss2.css" ?>
<rss version="2.0">
time=`date --rfc-2822`
<title>$TITLE</title>
<ttl>$suggested_update_freq</ttl>
<link>http://$site/$1</link>
<generator>http://www.pixelbeat.org/scripts/bashfeed</generator>
<description>$DESCRIPTION</description>
<managingEditor>$author</managingEditor>
<lastBuildDate>$time</lastBuildDate>
<language>`echo $LANG | sed 's/\(..\)_.*/\1/'`</language>
xml_unescaped() { sed 's/&amp;//g; s/&[lg]t;//g; s/&quot;//g' | grep -q "[&<>]"; }
find $1 -type f -printf "%p\t%T@\n" |
sed 's/^\.\///' | # strip leading ./ when "$1" is empty
sort -k2,2nr |
grep -E  "$include_re" |
grep -Ev "$exclude_re" |
while read file; do
    if ! head "$file" | grep -Fiq '<!--Exclude from bashfeed-->'; then
        echo "$file"
        i=$((i+1))
        [ $i -eq $num_files ] && break
while read file; do
    pubDate=`date --reference="$file" --rfc-2822`
    force_update=`date --reference="$file" "+%s"`
    title=""; keywords=""; description=""
    if echo "$file" | grep -Eq '\.(html|shtml|php)$'; then
        title=`sed -n 's/.*<title>\(.*\)<\/title>.*/\1/ip;T;q' < "$file"`
        keywords=`sed -n 's/.*<META.*NAME="keywords".*CONTENT="\(.*\)".*/\1/ip;T;q' < "$file"`
        description=`sed -n 's/.*<META.*NAME="description".*CONTENT="\(.*\)".*/\1/ip;T;q' < "$file"`
        page_author=`sed -n 's/.*<META.*NAME="author".*CONTENT="\(.*\)".*/\1/ip;T;q' < "$file"`
        extracted_text="$title $keywords $description$ $page_author"
        if echo "$extracted_text" | xml_unescaped; then
          echo "Error: HTML metadata in $file will not produce a valid XML feed" >&2
          exit 1
    elif [ -x "$file" ]; then # I always have a 1 line description on line 3 of my scripts
        description=`sed -n '3s/# \(.*\)/\1/p' "$file"`
    file=`echo "$file" | sed "$replace_default_files"`
    [ -z "$title" ] && title="$file"
    tags=""
    if [ ! -z "$keywords" ]; then
        for keyword in $keywords; do
            tags=`echo -ne "$tags<category>$keyword</category>\n    "`
        done
    if [ "$page_author" ]; then
        if [ "$page_author" = "$author" ]; then
            page_author=""
        else
            page_author=`echo -ne "<author>$page_author</author>\n    "`
    <title>$title</title>
    <guid>http://$site/$file</guid>
    <pubDate>$pubDate</pubDate>
    <link>http://$site/$file#$force_update</link>
    $page_author$tags<description><![CDATA[$description<!--$force_update-->]]></description>
echo '</channel>
Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

bashfeed

Latest commit

History

bashfeed

File metadata and controls