ref: bd5bce727abff8a02a61685f6ee2a199f875557e
dir: /bin/gr/
#!/bin/rc # Parse RSS feeds from Google Reader bundles into BULGE blog posts. # If a post with a matching <link> already exists, no new post will # be created for that <item>. Accordingly, the gr script may run # slowly for sites with a large number of existing posts. Tags will # be created from each <category>. rfork en switch($1){ case architecture feed='http://pipes.yahoo.com/pipes/pipe.run?_id=6e361b590b57934fb1e7c4e29339d619&_render=rss' site=read.stanleylieber.com tags=(architecture) case comics feed='http://pipes.yahoo.com/pipes/pipe.run?_id=6bc617a6b20aafd526affafc9a28a5d5&_render=rss' site=read.stanleylieber.com tags=(comics) case friends feed='https://www.google.com/reader/bundle/user%2F08193524211692385241%2Fbundle%2Ffriends' site=read.stanleylieber.com tags=(friends) case tech feed='http://pipes.yahoo.com/pipes/pipe.run?_id=6dd49be6e34a6871db9bcfc74d4b36b1&_render=rss' site=read.stanleylieber.com tags=(tech) case * echo usage: `{basename $0} '[ ... ]' >[1=2] exit usage } file=/tmp/gr.$1 werc=/usr/sl/www/werc if(test -f /boot/factotum) cmd=hget if not cmd='curl -s' fn f{ du -a $* | sed 's/^.* //g' } fn get_feed{ $"cmd $feed >$file.work { echo ' ,s/\"/\"/g ,s/\"/\"/g ,s/\&/\&/g ,s/\&/\&/g ,s/\'/\''/g ,s/\,/,/g ,s/\-/-/g ,s/\./\./g ,s/\//\//g ,s/\:/:/g ,s/\;/;/g ,s/\</</g ,s/\</</g ,s/\=/=/g ,s/\>/>/g ,s/\>/>/g ,s/\_/_/g ,s/\|/\|/g ,/<div id="items">/d /<div id="sidebar">/,d ,s/<h2 class="item-title">/\n<h2 class="item-title">/g ,s/<div class="item">.*$/HJDIVIDER/g ,s/\n//g ,s/HJDIVIDER/\n/g 1d w q ' echo } | sam -d $file.work >[2]/dev/null >[1=2] } fn get_tags{ switch($a_link){ case *1oct1993* tags=($tags 1oct1993) case *9front* tags=($tags software plan9 9front) case *amyearles* *pushedunder* *seaglass* *woolandwater* tags=($tags amy_earles) case *animenewsnetwork* tags=($tags telescreen anime) case *spikejapan* tags=($tags japan comics manga telescreen anime) case *ArchDaily* *archdaily* tags=($tags archdaily architecture) case *bldgblog* tags=($tags bldgblog architecture) case *kazuyosejima* tags=($tags japan architecture) case *Minimalissimo* tags=($tags minimalissimo design architecture) case *ArtFCity* *ArtFagCity* *artfagcity* tags=($tags artfagcity art) case *rhizome-fp* tags=($tags rhizome-fp art) case *rhizome* tags=($tags rhizome art) case *starwarsmodern* tags=($tags starwarsmodern art) case *tokyoartbeat* tags=($tags tokyoartbeat japan art) case *trendbeheer* tags=($tags trendbeheer art) case *ValentinaTanni* tags=($tags valentinatanni art) case *vvork* tags=($tags vwork art) case *auriea* *tale-of-tales* *taleoftales* tags=($tags auriea) case *basscomm* *closeoutwarrior* *crummysocks* *gamerrelocationproject* *protipoftheday* *PushButtonB* *pushbuttonb* tags=($tags video_games basscomm) case *benjaminmarra* tags=($tags comics benjamin_marra) case *boingboing* tags=($tags boingboing) case *bushinbooks* *henka* tags=($tags budo) case *alexaanddave* *CEREBUS* *Cerebus* *cerebus* *davesim* *gerhard* tags=($tags comics cerebus gerhard) case *coilhouse* tags=($tags coilhouse) case *arche-arc* tags=($tags arche comics) case *blaiselarmee* tags=($tags blaise_larmee comics) case *bleedingcool* tags=($tags bleedingcool comics) case *bobgreenberger* tags=($tags bob_greengerger comics) case *coldheatcomics* tags=($tags coldheat comics) case *comicbookresources* tags=($tags cbr comics) case *comicsbeat* tags=($tags comicsbeat comics) case *ComicsComics* *comicscomics* tags=($tags comicscomics comics) case *coveredblog* tags=($tags coveredblog comics) case *dcfifty-too* tags=($tags dcfifty-too comics) case *Destructoid* *destructoid* tags=($tags destructoid video_games) case *economist.com* tags=($tags economist) case *ferrandelgado* tags=($tags ferran_delgado comics) case *eddiecampbell* tags=($tags eddie_campbell comics) case *factualopinion* tags=($tags factualopinion comics) case *floating_world* *floatingworld* tags=($tags floating_world comics) case *frankmiller* tags=($tags frank_miller comics) case *humancolor* tags=($tags humancolor comics) case *jerkcity* tags=($tags jerkcity comics) case *newconstructionblog* tags=($tags newconstruction manga comics) case *ohdannyboy* tags=($tags ohdannyboy comics) case *pulphope* tags=($tags pulphope comics) case *pwbeat* tags=($tags pwbeat comics) case *reliablecomics* tags=($tags reliablecomics comics) case *reneefrench* tags=($tags renee_french comics) case *rickveitch* tags=($tags rick_veitch comics) case *smbc-comics* tags=($tags smbc comics) case *studygroup* tags=($tags studygroup comics) case *xkcd* tags=($tags xkcd comics) case *bowiesongs* *DavidBowie* *davidbowie* tags=($tags music david_bowie) case *designboom* tags=($tags designboom design) case *dezeen* tags=($tags dezeen design) case *infosthetics* tags=($tags infosthetics design) case *inhabitat* tags=($tags inhabitat architecture design) case *luigicolani* tags=($tags luigicolani design) case *mocoloco* tags=($tags mocoloco design) case *sydmead* tags=($tags sydmead design) case *dzima* tags=($tags dzima) case *bbcicecream* tags=($tags bbcicecream fashion) case *DamStyle *damstyle* tags=($tags damstyle fashion) case *facehunter* tags=($tags facehunter fashion) case *StilInBerlin* tags=($tags germany fashion) case *jstreets* tags=($tags jstreets japan fashion) case *Stilinberlin* tags=($tags stilinberlin germany fashion) case *stylefromtokyo* tags=($tags stylefromtokyo japan fashion) case *tokyofashion* tags=($tags tokyofashion japan fashion) case *flames.gif* *flamesgif* tags=($tags flames.gif) case *contemporary-home-computing* tags=($tags software flames.gif) case *kurzweil* tags=($tags kurzweil future) case *longnow* tags=($tags longnow future) case *OpenTheFuture* tags=($tags openthefuture future) case *golang* *blog.nella.org* tags=($tags golang) case *googlepluses* tags=($tags google) case *news.ycombinator.com* tags=($tags hackernews hack) case *seanbonner* tags=($tags sean_bonner hack) case *banriman* tags=($tags banriman japan) case *japansubculture* tags=($tags japansubculture japan) case *jeansnow* tags=($tags jeansnow japan) case *Kotaku* *kotaku* tags=($tags kotaku video_games) case *eforemario* tags=($tags before_mario video_games) case *nakakobooks* tags=($tags books nakakobooks japan) case *ozawamaria* tags=($tags maria_ozawa japan) case *ibuya246* tags=($tags shibuya246 japan) case *shisaku.blogspot.com* tags=($tags shisaku japan) case *jimshooter* tags=($tags comics jim_shooter) case *LettersOfNote* *lettersofnote* tags=($tags letters) case *nasa*letters.rss* tags=($tags nasa letters) case *hellodamage* *manganews* *naokiurasawa* *samehat* tags=($tags comics manga) case *mangatraders* tags=($tags p2p comics manga) case *hortonheardawho* tags=($tags hortonheardawho flickr nasa mars) case *me-vs-gutenberg* *mevsgutenberg* tags=($tags martin_sand) case *marxy* tags=($tags marxy) case *etamodern* tags=($tags metamodern) case *aviationintel* tags=($tags aviationintel mil) case *aviationweek* tags=($tags aviationweek mil) case *codeonemagazine* tags=($tags codeonemagazine mil) case *geimint* tags=($tags geimint mil) case *bjork* tags=($tags bjork music) case *momus* *mrstsk* tags=($tags books momus music) case *toog* tags=($tags toog music) case *nasa.gov* tags=($tags space nasa) case *mongoliad* tags=($tags neal_stephenson) case *gaiman* tags=($tags comics neil_gaiman) case *nin.com* *feeds.nin.com* tags=($tags music nin) case *nix-os* *syssoftware* tags=($tags plan9 nix) case *bsdly* *OPENBSD* *OpenBSD* *openbsd* *scientist-home* *undeadly* tags=($tags software openbsd) case *godownmatthew* *mysticmilk* *petetoms* tags=($tags pete_toms) case *Pitchfork* *pitchfork* tags=($tags music pitchfork) case *9gridchan* *cat-v* *maht0x0r* *Plan9* *plan9* tags=($tags software plan9) case *FlauntTalks* *prince.org* *purpleinterviews* *wendyandlisa* tags=($tags music prince) case *commandcenter* *rob_pike* tags=($tags golang plan9 rob_pike) case *prometheus* tags=($tags telescreen prometheus) case *reddit.com* tags=($tags reddit) case *swtch.com* tags=($tags golang plan9 rsc) case *bunniestudios* tags=($tags bunniestudios security) case *jwz* tags=($tags jwz security) case *Krebs* *krebs* tags=($tags krebs security) case *scarybeastsecurity* tags=($tags scarybeast security) case *schneier* tags=($tags bruce_schneier security) case *chinchillakwak* *skwak* tags=($tags skwak) case *slashdot* tags=($tags slashdot) case *stanleylieber* tags=($tags stanleylieber) case *fastcompany* tags=($tags fastcompany tech) case *danharmon* tags=($tags danharmon telescreen) case *mindlessones* tags=($tags mindlessones telescreen) case *tcj.com* tags=($tags comics tcj) case *TEDblog* *ted.com* tags=($tags ted) case *ticom* tags=($tags ticom security) case *orrentfreak* tags=($tags p2p torrentfreak) case *ultra*culture* tags=($tags ultraculture) case *kleinletters* tags=($tags comics todd_klein) case *kennercollector* tags=($tags kennercollector toys) case *plaidstallions* tags=($tags plaidstallions toys) case *shojikawamori* tags=($tags shojikawamori japan toys) case *ruinedcartridge* tags=($tags basscomm video_games) } echo -n $tags } fn parse_posts{ ifs=' ' { for(post in `{sort -r $file.work}){ if(! ~ $post ''){ a_title=`{echo $post | sed 's/^.*<h2 class="item-title"><div class="">//g' | sed 's/<div class="item-info">.*$//g' | htmlfmt} if(~ $#a_title 0) a_title=`{echo $post | sed 's/^.*<div class="item-info">//g' | sed 's/<div class="item-annotations">.*$//g' | htmlfmt} if(~ $#a_title 0) a_title=fake a_link=`{echo $post | sed 's/^.*<h2 class="item-title"><div class="">//g' | sed 's/<div class="item-info">.*$//g' | htmlfmt -a | sed 's/^.*\[//g' | sed 's/\].*$//g' | sed 's/ .*$//g'} if(~ $#a_link 0 || ! ~ $"a_link http*) a_link=`{echo $post | sed 's/^.*<h2 class="item-title"><div class="">//g' | sed 's/<div class="item-info">.*$//g' | sed 's/^.*<a href="//g' | sed 's/\".*$//g' | sed 's/^.*http/http/g' | sed 's/ .*$//g'} if(~ $#a_link 0 || ! ~ $"a_link http*) a_link=`{echo $post | sed 's/^.*<div class="item-info">//g' | sed 's/<div class="item-annotations">.*$//g' | htmlfmt -a | sed 's/^.*\[//g' | sed 's/\].*$//g' | sed 's/ .*$//g'} if(~ $#a_link 0 || ! ~ $"a_link http*) a_link=`{echo $post | sed 's/^.*<div class="item-info">//g' | sed 's/<div class="item-annotations">.*$//g' | sed 's/^.*<a href="//g' | sed 's/" class="f">.*$//g' | sed 's/\".*$//g' | sed 's/^.*http/http/g' | sed 's/ .*$//g'} a_link=$a_link(1) if(~ $#a_link 0 || ~ $"a_link '' || ! ~ $"a_link http*) a_link=fake if(! ~ $#a_link 1) a_link=`{echo $a_link | awk '{print $1;}'} a_date=`{date -n} a_body=`{echo $post | sed 's/^.*<div class="item-body">//g' | sed 's/<div class="clear">.*$//g'} if(~ $#a_body 0) a_body=fake a_id=`{echo `{ls -p $werc/sites/$site/src | sort -n | tail -1}^+1 | bc} if(~ $#a_id 0) a_id=1 while(test -d $werc/sites/$site/src/$a_id) a_id=`{echo $a_id^+1 | bc} if(! ~ $"a_link `{cat $werc/sites/$site/src/*/link}){ mkdir -p $werc/sites/$site/src/$a_id/tags # big fat race echo $"a_title >$werc/sites/$site/src/$a_id/title echo $"a_date >$werc/sites/$site/src/$a_id/date echo $"a_link >$werc/sites/$site/src/$a_id/link echo $"a_body '</a></li></ul>' >$werc/sites/$site/src/$a_id/body ifs=' ' { for(j in `{get_tags}){ >$werc/sites/$site/src/$a_id/tags/$j echo $a_id/tags/$j >>$werc/sites/$site/tags } } } if(test -f /boot/factotum) for(i in `{f $werc/sites/$site/src/$a_id}) chmod +t $i } } } } if(test -f /boot/factotum && test -f /rc/bin/hget) webfs get_feed parse_posts