On 2011-10-29 19:22, Christoph Lohmann wrote:
>>> I'm looking for a cli newsreader, suckless, less dependencies. Thanks!
>> cron + curl + xmlstarlet + md5 = lighter than rss2email.py
> Would you mind sharing your script for that?
Not exactly posix, so it's got 1/2 of running code, rough concensus ;)
rss() splits an rss feed, drops each article to disk with a filename =
the md5sum of the article, then prints all the new articles since last
cron run that match a basic grep.
things it could do better:
be POSIX
use ETAG / HEAD check
not use perl for the filter
filter has been a ridiculously useful script, being a hash of common
encoding transforms. It could be a sed scripts or in C; anyone seen
or written something like it?
Patrick
wcat() {
which curl > /dev/null 2>&1 && curl -s "$1";
if [[ $? != 0 ]]; then
which wget > /dev/null 2>&1 && wget -q -O - "$1";
fi;
return $?
}
rss_disk() {
split='#####'
xmlstarlet sel -t -m "//*[local-name()='description']" -v '.' -n -o "$split" -n | while read line; do
[[ "$line" != $split ]] && {
article="${article} ${line}"
continue
}
d=$( md5sum <<< "$article" | sed -e 's/ .*//' )
[[ ! -e "$d" ]] && { echo "$article" > $d ; }
article=""
done
}
# lgab =
https://github.com/patrickhaller/linux-gnu-admin-bash
rss() {
dir=$1
url=$2
pattern="$3"
cd $dir
wcat $url | rss_disk &
lgab_kill_after 30
find $dir -type f -cmin -9 -print | while read f; do
tmp="${f}.html"
filter htmldecode < $f > $tmp
grep -qEi "$pattern" $tmp && lynx -dump $tmp
/bin/rm $tmp
done
find $dir -type f -ctime +7 -delete
}
# filter
#!/usr/bin/perl
use MIME::Base64;
use CGI ':standard';
use HTML::Entities;
my $action = shift;
my %block = (
email => '/($s)/i and print $1 . "\n"',
url => '/href\s*=\s*(?:\'|\")($s)(?:\'|\")/i and print $1 . "\n"',
base64 => 'if (/^[a-zA-Z0-9\+\/]{78,}/) { print MIME::Base64::decode_base64($_) } else { print }',
urlencode => 'print CGI::escape($_)',
htmlencode => 'print HTML::Entities::encode_entities($_)',
htmldecode => 'print HTML::Entities::decode_entities($_)',
crlf => q{ s/\r//g; print },
);
my %re = (
email => qr/ <? \S+ \_AT_ \S+ >? /x,
url => qr/ [^\"\']+ /x,
);
my $s = $re{$action};
map { eval($block{$action}) } <>;
Received on Sun Oct 30 2011 - 02:31:13 CEST