forked from pixelb/scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbashfeed
More file actions
executable file
·146 lines (126 loc) · 5.36 KB
/
bashfeed
File metadata and controls
executable file
·146 lines (126 loc) · 5.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#!/bin/sh
# Generate an RSS 2.0 feed directly from your existing web site
# Author - Pádraig Brady <P@draigBrady.com>
# Licence - LGPLV2
# Releases -
# 1.0 - Jun 19 2006 - Initial release
# 1.1 - Jun 26 2006 - Exclude files with "Exclude from bashfeed"
# HTML comment within the first 10 lines.
# 1.2 - May 01 2007 - Add author elements (from html if present)
# 1.4 - May 04 2011
# http://github.com/pixelb/scripts/commits/master/scripts/bashfeed
# Just run this script from the root directory of your web site and
# it will generate feed items for the newest files. Generally I do this
# just before I sync my local web site copy to my public server.
# One can generate a feed for a subset of the site by still running
# from the root directory, and passing a subdirectory to start at.
# To change the feed TITLE and DESCRIPTION, set those environment
# variables before running the script.
# Which files are selected and excluded can be configured below.
# Note for html files it will extract the following elements if present
# <head>
# <title>Item title</title>
# <meta name="description" content="One line item description">
# <meta name="keywords" content="Item tags">
# </head>
# Note this script will keep the same item guid for an updated file.
# Just updating the pubDate will not cause liferea 1.0.11 at least
# to mark the item as updated (or update the timestamp even).
# One must change the description or title also, and so
# I set the (hidden) description to the file timestamp.
#
# Testing with thunderbird 1.0.8 shows that it indexes on link
# and so wont ever show updates to other fields. Therefore I append #seconds
# to the link to force it to create a new entry for an updated item.
#
# Note you may find the http://www.pixelbeat.org/scripts/fix script
# useful for doing edits to files that you don't want to show up
# as updated content in the feed, or generally edit a file without
# changing the modification date.
num_files=10
site="www.pixelbeat.org"
author="P@draigBrady.com (Pádraig Brady)"
suggested_update_freq=1440 #mins
: ${TITLE=$site}
: ${DESCRIPTION="latest from $site"}
#files starting with . | files without a . | files ending in .c .cpp ...
include_re='(^|/)[.].+|(^|/)[^.]+$|[.](c|cpp|py|sh|rc|tips|fortune|html)$' #only show these files
exclude_re='(\.git/|priv/|tmp/|.htaccess|xvpics|timeline\.html|modified\.html|head\.html|header\.html|footer\.html|footer-home\.html|adds\.html|last\.html|fslint/(NEWS\.html|md5sum)|README)' #don't show these paths
default_files="index.html index.shtml index.php"
############# No user serviceable parts below ###################
for file in $default_files; do
replace_default_files="$replace_default_files; s/\(.*\)$file$/\1/;t"
done
echo '<?xml version="1.0" encoding="utf-8"?>
<?xml-stylesheet type="text/css" href="rss2.css" ?>
<rss version="2.0">
<channel>'
time=`date --rfc-2822`
echo "
<title>$TITLE</title>
<ttl>$suggested_update_freq</ttl>
<link>http://$site/$1</link>
<generator>http://www.pixelbeat.org/scripts/bashfeed</generator>
<description>$DESCRIPTION</description>
<managingEditor>$author</managingEditor>
<lastBuildDate>$time</lastBuildDate>
<language>`echo $LANG | sed 's/\(..\)_.*/\1/'`</language>
"
xml_unescaped() { sed 's/&//g; s/&[lg]t;//g; s/"//g' | grep -q "[&<>]"; }
find $1 -type f -printf "%p\t%T@\n" |
sed 's/^\.\///' | # strip leading ./ when "$1" is empty
sort -k2,2nr |
cut -f1 |
grep -E "$include_re" |
grep -Ev "$exclude_re" |
while read file; do
if ! head "$file" | grep -Fiq '<!--Exclude from bashfeed-->'; then
echo "$file"
i=$((i+1))
[ $i -eq $num_files ] && break
fi
done |
while read file; do
pubDate=`date --reference="$file" --rfc-2822`
force_update=`date --reference="$file" "+%s"`
title=""; keywords=""; description=""
if echo "$file" | grep -Eq '\.(html|shtml|php)$'; then
title=`sed -n 's/.*<title>\(.*\)<\/title>.*/\1/ip;T;q' < "$file"`
keywords=`sed -n 's/.*<META.*NAME="keywords".*CONTENT="\(.*\)".*/\1/ip;T;q' < "$file"`
description=`sed -n 's/.*<META.*NAME="description".*CONTENT="\(.*\)".*/\1/ip;T;q' < "$file"`
page_author=`sed -n 's/.*<META.*NAME="author".*CONTENT="\(.*\)".*/\1/ip;T;q' < "$file"`
extracted_text="$title $keywords $description$ $page_author"
if echo "$extracted_text" | xml_unescaped; then
echo "Error: HTML metadata in $file will not produce a valid XML feed" >&2
exit 1
fi
elif [ -x "$file" ]; then # I always have a 1 line description on line 3 of my scripts
description=`sed -n '3s/# \(.*\)/\1/p' "$file"`
fi
file=`echo "$file" | sed "$replace_default_files"`
[ -z "$title" ] && title="$file"
tags=""
if [ ! -z "$keywords" ]; then
for keyword in $keywords; do
tags=`echo -ne "$tags<category>$keyword</category>\n "`
done
fi
if [ "$page_author" ]; then
if [ "$page_author" = "$author" ]; then
page_author=""
else
page_author=`echo -ne "<author>$page_author</author>\n "`
fi
fi
echo "
<item>
<title>$title</title>
<guid>http://$site/$file</guid>
<pubDate>$pubDate</pubDate>
<link>http://$site/$file#$force_update</link>
$page_author$tags<description><![CDATA[$description<!--$force_update-->]]></description>
</item>
"
done &&
echo '</channel>
</rss>'