#! /usr/bin/env tclsh
package require tax
namespace eval rss {
namespace eval channels {
}
namespace eval items {
}
}
proc rss::__replace_entities {text} {
return [string map [list " " " " ">" ">" "<" "<" "&" "&"] $text]
}
proc rss::__strip_html {text} {
# We replace entities here (i.e., twice) because HTML-inside-XML will have
# the HTML entities escaped twice.
return [__replace_entities [regsub -all -- {<[^>]*>} $text ""]]
}
proc rss::__tax_add_to_object {obj tag isClose isSelfClosing properties body} {
upvar #0 $obj rssobj
set channelid [namespace tail $obj]
set tag [string tolower [string trim $tag]]
if {$tag == "docstart"} {
set rssobj(parent) [list]
namespace eval ::rss::items::$channelid {}
}
if {$tag == "docstart" || $tag == "rss"} {
return
}
if {[string index $tag 0] == "?"} {
return
}
if {$isClose && !$isSelfClosing} {
if {$tag == "item"} {
# We close tag items twice, because we add a fake open with the tag id
set rssobj(parent) [lrange $rssobj(parent) 0 end-1]
}
set rssobj(parent) [lrange $rssobj(parent) 0 end-1]
return
}
lappend rssobj(parent) $tag
set parent [lindex $rssobj(parent) end-1]
switch -- $tag {
"item" {
if {$parent == "channel"} {
set lastusedid [namespace tail [lindex [lsort -dictionary -decreasing [info procs ::rss::items::${channelid}::*]] 0]]
if {$lastusedid == ""} {
set lastusedid 0
}
set id "::rss::items::${channelid}::[expr $lastusedid + 1]"
proc $id [list command [list obj $obj] [list id $id]] {
upvar #0 $obj rssobj
switch -- [string tolower $command] {
"title" {
set idx [list $id title]
}
"link" {
set idx [list $id link]
}
"description" {
set idx [list $id description]
}
"date" {
set idx [list $id pubdate]
}
}
if {![info exists idx]} {
return ""
}
if {![info exists rssobj($idx)]} {
return ""
}
return $rssobj($idx)
}
lappend rssobj(items) $id
lappend rssobj(parent) $id
}
}
"title" {
set rssobj([list $parent title]) [__strip_html [__replace_entities $body]]
}
"link" {
set rssobj([list $parent link]) [__replace_entities $body]
}
"description" {
set rssobj([list $parent description]) [__strip_html [__replace_entities $body]]
}
"pubdate" {
catch {
set body [clock scan $body]
}
set rssobj([list $parent pubdate]) $body
}
}
if {$isClose} {
# For self-closing tags
if {$tag == "item"} {
# We close tag items twice, because we add a fake open with the tag id
set rssobj(parent) [lrange $rssobj(parent) 0 end-1]
}
set rssobj(parent) [lrange $rssobj(parent) 0 end-1]
}
return
}
# Return ID
proc rss::parse {data} {
set lastusedid [namespace tail [lindex [lsort -dictionary -decreasing [info vars ::rss::channels::*]] 0]]
if {$lastusedid == ""} {
set lastusedid 0
}
set id "::rss::channels::[expr $lastusedid + 1]"
upvar #0 $id rssobj
tax::parse [list ::rss::__tax_add_to_object $id] $data
proc $id [list command [list obj $id]] {
upvar #0 $obj rssobj
switch -- $command {
"items" {
set idx items
}
"description" {
set idx [list channel description]
}
"link" {
set idx [list channel link]
}
}
if {![info exists idx]} {
return ""
}
if {![info exists rssobj($idx)]} {
return ""
}
return $rssobj($idx)
}
return $id
}
proc rss::cleanup {id} {
if {[string match "::rss::channels::*" $id]} {
set channelid [namespace tail $id]
foreach proc [info procs ::rss::items::${channelid}::*] {
rename $proc ""
}
unset -nocomplain $id
}
return 1
}
package provide rss 0.1You'll notice that it is slightly inconsisent towards the middle.. I should have used a namespace under ::rss for both channels and items, but I didn't realize this until I didn't feel like changing it. Feel free to edit the above, or use it in your own code.
LV Do you have any examples to demonstrate?
Roy Keene Sure, a small example:
#! /usr/bin/env tclsh package require rss package require http set token [http::geturl "http://www.digg.com/rss/index.xmlA more complete example:"] set rssdata [http::data $token] http::cleanup $token set id [rss::parse $rssdata] foreach item [$id items] { puts "[clock format [$item date]]: [$item title]: [$item description] ([$item link])" }
#! /usr/bin/env tclsh
package require rss
package require http
package require Tk
proc gui_bg_update_news {newsobj} {
set rssfeeds [list {http://news.google.com/news?ned=us&topic=h&output=rss}
{http://rss.cnn.com/rss/cnn_world.rss}
]
http::config -useragent {Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 OpenSSL/0.9.7e}
if {![info exists ::rss_newsitems]} {
set ::rss_newsitems [list]
}
foreach url $rssfeeds {
catch {
http::geturl $url -command gui_bg_update_news_data
}
}
after 30000 [list gui_bg_update_news_text $newsobj]
}
proc gui_bg_update_news_text {newsobj} {
if {[llength $::rss_newsitems] != 0} {
$newsobj delete 0 end
unset -nocomplain ::rss_newsitems_urls
foreach item [lsort -dictionary -index 0 $::rss_newsitems] {
set date [lindex $item 0]
set title [lindex $item 1]
set desc [lindex $item 2]
set link [lindex $item 3]
$newsobj insert end "$title"
set ::rss_newsitems_urls($title) $link
}
set ::rss_newsitems [list]
}
after 120000 [list gui_bg_update_news $newsobj]
}
proc gui_bg_update_news_data {token} {
if {[http::ncode $token] != "200"} {
if {$::DEBUG} {
puts "Error opening url: [http::ncode $token]"
}
http::cleanup $token
return
}
set rssdata [http::data $token]
if {[catch {
set id [rss::parse $rssdata]
set newsitems [list]
foreach item [$id items] {
set newitem [list [$item date] [$item title] [$item description] [$item link]]
if {[lsearch $::rss_newsitems $newitem] == -1} {
lappend ::rss_newsitems $newitem
}
}
} err]} {
if {[info exists $::DEBUG]} {
puts "Error in RSS feed update: $err"
puts "$::errorInfo"
}
}
http::cleanup $token
if {[info exists id]} {
rss::cleanup $id
}
}
proc load_rss_url {newsobj x y} {
set idx [$newsobj nearest $y]
set idxbbox [$newsobj bbox $idx]
set idx_starty [expr [lindex $idxbbox 1]]
set idx_endy [expr $idx_starty + [lindex $idxbbox 3]]
if {$y < ($idx_starty - 4)} {
return
}
if {$y > ($idx_endy + 4)} {
return
}
set title [$newsobj get $idx]
if {![info exists ::rss_newsitems_urls($title)]} {
return
}
set url $::rss_newsitems_urls([$newsobj get $idx])
puts "Loading URL: $url"
# XXX: TODO, Figure out how to actually load a URL across platforms
}
listbox .newsInfo -width 80
button .exit -text "Exit" -command exit
pack .newsInfo -expand 1 -fill both
pack .exit
bind .newsInfo <Double-1> [list load_rss_url .newsInfo %x %y]
after 1000 [list gui_bg_update_news .newsInfo]Category Internet

