Updated 2014-01-19 21:13:14 by RLE

This is a small program I wrote to monitor changes in the RSS feeds of various websites and email those changes to me at work. I currently am running it as a cron job on debian linux with Tcl 8.4.6. BDK

The first file, rss.tcl, is a very minimal package to read rss feeds. Yes, I know such a package already exists, but it helped me get into the xml package.

I don't think any of the RSS packages have made it into tcllib or the other major distributed extensions, have they? Perhaps one of these could be considered for tcllib?

schlenk The basic problem with XML stuff for the tcllib is, that there isn't any pure Tcl xml package inside tcllib ( a great lacking IMHO).
 #!/usr/local/bin/tclsh
 package require Tcl 8.4
 package require struct 2.0
 package require xml 2.6
 package require snit 0.9

 package provide rss 1.0

 namespace eval ::rss {
    variable parser
    variable parserStack
    variable channelObject

    variable currentCmds
    array set currentCmds \
        [list \
             elementStart [list [namespace current]::XML.StartRSS] \
             elementEnd [list [namespace current]::XML.EndRSS] \
             characterData {} \
            ]
 }

 proc ::rss::Parser.NewState {elementStart elementEnd characterData} {
    variable parser
    variable parserStack
    variable currentCmds
    variable channel

    $parserStack push \
        [list \
             $currentCmds(elementStart) \
             $currentCmds(elementEnd) \
             $currentCmds(characterData) \
             ]

    set currentCmds(elementStart) $elementStart
    set currentCmds(elementEnd) $elementEnd
    set currentCmds(characterData) $characterData

    return
 }

 proc ::rss::Parser.PreviousState {} {
    variable parser
    variable parserStack
    variable currentCmds
    
    foreach {currentCmds(elementStart) currentCmds(elementEnd) currentCmds(characterData)} [$parserStack pop] {break}

    return
 }

 proc ::rss::Wrapper.ElementStart {name attlist args} {
    variable currentCmds

    if {$currentCmds(elementStart)!={}} {
        set code [catch {uplevel \#0 $currentCmds(elementStart) [list $name $attlist] $args} result]
        return -code $code -errorinfo $::errorInfo -errorcode $::errorCode $result
    }
 }

 proc ::rss::Wrapper.ElementEnd {name args} {
    variable currentCmds

    if {$currentCmds(elementEnd)!={}} {
        set code [catch {uplevel \#0 $currentCmds(elementEnd) [list $name] $args} result]
        return -code $code -errorinfo $::errorInfo -errorcode $::errorCode $result
    }
 }

 proc ::rss::Wrapper.CharacterData {data} {
    variable currentCmds

    if {$currentCmds(characterData)!={}} {
        set code [catch {uplevel \#0 $currentCmds(characterData) [list $data]} result]
        return -code $code -errorinfo $::errorInfo -errorcode $::errorCode $result
    }
 }

 proc ::rss::parse {data} {
    variable parser
    variable parserStack
    variable channel
    variable currentCmds

    set parser [::xml::parser ]
    set parserStack [::struct::stack]
    set channel [Channel %AUTO%]

    $parser configure \
        -elementstartcommand [namespace current]::Wrapper.ElementStart \
        -elementendcommand [namespace current]::Wrapper.ElementEnd \
        -characterdatacommand [namespace current]::Wrapper.CharacterData

    if {[catch {$parser parse $data} errorMsg]} {
       # Caught an error, destroy the channel
       $channel destroy
       set channel {}

       # Reset the stack
       while {[$parserStack size] != 0} {
          Parser.PreviousState
       }

    $parser free
    $parserStack destroy

    if {$channel == {}} {
       return -code error $errorMsg
    } else {
       return $channel
    }
 }

 proc ::rss::XML.StartRSS {name attlist args} {
    variable channel

    switch -- $name {
        channel {
            Parser.NewState \
                [list [namespace current]::XML.Channel $channel] \
                [list [namespace current]::XML.ElementEnd] \
                {}
        }
        item {
            set obj [Item %AUTO%]
            $channel AddItem $obj

            Parser.NewState \
                [list [namespace current]::XML.Item $obj] \
                [list [namespace current]::XML.ElementEnd] \
                {}
        }
    }

    return
 }

 proc ::rss::XML.EndRSS {name args} {
    return
 }

 proc ::rss::XML.Channel {obj name attlist args} {
    switch -- $name {
        title {
            Parser.NewState \
                [list [namespace current]::XML.ElementStart] \
                [list [namespace current]::XML.ElementEnd] \
                [list [namespace current]::XML.CharacterData [$obj GetVariable title]]
        }
        link {
            Parser.NewState \
                [list [namespace current]::XML.ElementStart] \
                [list [namespace current]::XML.ElementEnd] \
                [list [namespace current]::XML.CharacterData [$obj GetVariable link]]
        }
        description {
            Parser.NewState \
                [list [namespace current]::XML.ElementStart] \
                [list [namespace current]::XML.ElementEnd] \
                [list [namespace current]::XML.CharacterData [$obj GetVariable description]]
        }
        item {
            set item [Item %AUTO%]
            $obj AddItem $item

            Parser.NewState \
                [list [namespace current]::XML.Item $item] \
                [list [namespace current]::XML.ElementEnd] \
                {}
        }
        default {
            Parser.NewState \
                [list [namespace current]::XML.ElementStart] \
                [list [namespace current]::XML.ElementEnd] \
                {}
        }
    }

    return
 }

 proc ::rss::XML.Item {obj name attlist args} {
    switch -- $name {
        title {
            Parser.NewState \
                [list [namespace current]::XML.ElementStart] \
                [list [namespace current]::XML.ElementEnd] \
                [list [namespace current]::XML.CharacterData [$obj GetVariable title]]
        }
        link {
            Parser.NewState \
                [list [namespace current]::XML.ElementStart] \
                [list [namespace current]::XML.ElementEnd] \
                [list [namespace current]::XML.CharacterData [$obj GetVariable link]]
        }
        description {
            Parser.NewState \
                [list [namespace current]::XML.ElementStart] \
                [list [namespace current]::XML.ElementEnd] \
                [list [namespace current]::XML.CharacterData [$obj GetVariable description]]
        }
        default {
            Parser.NewState \
                [list [namespace current]::XML.ElementStart] \
                [list [namespace current]::XML.ElementEnd] \
                {}
        }
    }

    return
 }

 proc ::rss::XML.ElementStart {name attlist args} {
    Parser.NewState \
        [list [namespace current]::XML.ElementStart] \
        [list [namespace current]::XML.ElementEnd] \
        {}

    return
 }

 proc ::rss::XML.ElementEnd {name args} {
    Parser.PreviousState

    return
 }

 proc ::rss::XML.CharacterData {var data} {
    upvar \#0 $var myVar

    append myVar $data

    return
 }

 ::snit::type ::rss::Channel {
    variable title {}
    variable link {}
    variable description {}
    variable items {}

    destructor {
        foreach item $items {
            $item destroy
        }

        return
    }

    method title {} {
        return $title
    }

    method link {} {
        return $link
    }

    method description {} {
        return $description
    }

    method items {} {
        return $items
    }

    method GetVariable {var} {
        return [varname $var]
    }

    method AddItem {item} {
        lappend items $item
    }
 }

 ::snit::type ::rss::Item {
    variable title {}
    variable link {}
    variable pubDate {}
    variable description {}

    method title {} {
        return $title
    }

    method link {} {
        return $link
    }

    method pubDate {} {
        return $pubDate
    }

    method description {} {
        return $description
    }

    method GetVariable {var} {
        return [varname $var]
    }
 }

This is the main body of the program, rss_monitor.tcl
 #!/usr/local/bin/tclsh
 lappend auto_path .

 package require Tcl 8.4
 package require http 2.4
 package require mime 1.3
 package require smtp 1.3
 package require md5 1.4
 package require rss 1.0

 #Reads the options file.
 proc loadOptions {file} {
    #Initialize the interpreter which executes the contents
    #of the options file.
    set interp [interp create -safe]
    $interp eval [list namespace delete ::]
    $interp alias email loadOptions.email

    #Read the options file.
    set inFile [open $file r]
    $interp eval [read $inFile]
    close $inFile

    interp delete $interp

    return
 }

 proc loadOptions.email {email data} {
    #Initialize the interpeter which executes the contents of the
    #data variable.
    set interp [interp create -safe]
    $interp eval [list namespace delete ::]
    $interp alias rss loadOptions.rss $email

    $interp eval $data

    interp delete $interp

    return
 }

 proc loadOptions.rss {email url} {
    #Store the url in the global options array.
    if {![info exists ::options(email,$url)]} {
        lappend ::options(url) $url
    }
    lappend ::options(email,$url) $email

    return
 }

 #Loads the MD5 hash records.
 proc loadHashes {file} {
    #Create the interpeter.
    set interp [interp create -safe]
    $interp eval [list namespace delete ::]
    $interp alias hash loadHashes.hash

    #Open the file and read in the data.
    set inFile [open $file r]
    $interp eval [read $inFile]
    close $inFile

    interp delete $interp

    return
 }

 proc loadHashes.hash {url hash} {
    #Store the hash in the global options array.
    set ::options(hash,$url) $hash

    return
 }

 #Saves the hashes for all of the url's.
 proc saveHashes {file} {
    set outFile [open $file w]

    foreach url $::options(url) {
        if {[info exists ::options(hash,$url)]} {
            puts $outFile [list hash $url $::options(hash,$url)]
        }
    }

    close $outFile
 }
 
 #Do the work of parsing RSS feeds, generating and comparing hashes, sending messages.
 proc generateMessages {} {
    foreach url $::options(url) {
        #Retrieve the current hash for the URL.
        if {[info exists ::options(hash,$url)]} {
            set hash $::options(hash,$url)
        } else {
            set hash {}
        }

        #Retrieve the URL.
        set token [::http::geturl $url -timeout 2000]
        if {[::http::status $token] != "ok"} {
            puts "Could not retrieve data for $url"
            ::http::cleanup $token
            continue
        }
        set data [::http::data $token]
        ::http::cleanup $token

        #Parse the RSS feed.
        set channel [::rss::parse $data]

        set msgBody {}

        #Generate the msg body.
        append msgBody "[$channel title] ([$channel link])\n"
        append msgBody "[$channel description]\n\n"
        foreach item [$channel items] {
            append msgBody "----------------------------------------------------------------\n"
            append msgBody "[$item title] ([$item link])\n"
            append msgBody "\n"
            append msgBody "[$item description]\n"
            append msgBody "----------------------------------------------------------------\n"
            append msgBody "\n"
        }

        #Generate the new hash.
        #I thought it would be more efficient to generate the hash prior to parsing and
        #creating the message, but some sites change the comments within their XML data
        #to reflect when the feed was generated, like sourforge.net for example,
        #so this is a quick and simple fix.
        set newHash [::md5::md5 $msgBody]

        #If the hashes do not match, then the site has changed, 
        #so send out the messages.
        if {$newHash != $hash} {

            #Create the MIME message.
            set mime [::mime::initialize -canonical text/plan -string $msgBody]
            ::mime::setheader $mime Subject "[$channel title] has been updated"
            
            foreach email $::options(email,$url) {
                ::smtp::sendmessage $mime \
                    -recipients $email \
                    -originator "[email protected]"
            }

            #Destroy (deallocate) the MIME message.
            ::mime::finalize $mime

            #Destroy (deallocate) parsed data.
            $channel destroy
        }

        #Store the new hash in the options array.
        set ::options(hash,$url) $newHash
    }
 }

 #The main program.

 #Locations of the configuration files.
 set configFile "rss_config"
 set hashFile "rss_hashes"

 loadOptions $configFile

 if {[file exists $hashFile]} {
    loadHashes $hashFile
 }

 generateMessages

 saveHashes $hashFile

And finally, and example configuration file, rss_config
 email [email protected] {
        #Tcl'ers Wiki
        rss http://wiki.tcl.tk/rss.xml

        #Slashdot
        rss http://slashdot.org/index.rss

        #Sourceforge
        rss http://sourceforge.net/export/rss2_sfnews.php?feed
 }

 email [email protected] {
        #Sourceforge
        rss http://sourceforge.net/export/rss2_sfnews.php?feed          
 }