Updated 2014-06-21 22:18:39 by RLE

if 0 {Richard Suchenwirth 2005-02-06 - This weekend fun project deals with a little RSS browser. You can type or paste the RSS URL (often with .xml extension) in the entry on top, and on <Return> the XML document is read and rendered in the upper text widget. Clicking on a link (blue, underlined) retrieves that page, and renders it in a very simple way (trying to omit all frames, images, ads, and whatever noise the page contained) in the lower text.

This is highly experimental, so even less warranties than usual, but it sure is great fun! Note: the umlaut entities in the html2txt proc are resolved when this page is rendered (on IE 5 at least), so best copy the source in Edit mode, if you care for umlauts :)

JSI 09feb05 This link shows the sourcefile readonly ;-) http://wiki.tcl.tk/13490.txt

Known issues:

See also A little RSS reaper, which makes an HTML page from an RSS feed and the pages it links to. }
 package require http
 package require uri
 package require Tk

 proc main argv {
    pack [entry .e -textvar e] -fill x
    bind .e <Return> {showRSS .x.t $e}
    pack [panedwindow .p -ori vert] -fill both -expand 1

    .p add [frame .x]
    pack [scrollbar .x.y -command ".x.t yview"] -fill y -side right
    pack [text .x.t -wrap word -yscrollc ".x.y set" -spacing1 3 \
        -padx 5 -pady 3 -height 10] \
        -fill both -expand 1 -side right
    .x.t tag config link -foreground blue -underline 1
    .x.t tag bind link <Enter> {.x.t config -cursor hand2}
    .x.t tag bind link <Leave> {.x.t config -cursor {}}
    .x.t tag bind link <ButtonRelease-1> {click .x.t .f.t}

    .p add [frame .f]
    pack [scrollbar .f.y -command ".f.t yview"] -fill y -side right
    pack [text .f.t -wrap word -yscrollc ".f.y set" -spacing1 3\
        -padx 5 -pady 3 -height 12] \
        -fill both -expand 1 -side right
    foreach i {red blue green3} {.f.t tag config $i -foreground $i}
    .f.t tag config title -font {Helvetica 11 bold}
    .f.t tag config bold -font "[.f.t cget -font] bold"
    focus .e
    raise .
    #set ::e {http://www.spiegel.de/schlagzeilen/rss/0,5291,,00.xml}
    set ::e http://www.tagesspiegel.de/feed/index.xml
    showRSS .x.t $::e
 }

if 0 {This proc is called when a RSS link is clicked. It uses the fact that the second tag is the URL itself:}
 proc click {rsswin textwin} {
    set url [lindex [$rsswin tag names insert] 1]
    $rsswin tag configure $url -foreground purple4
    showHTML $textwin $url
 }

if 0 {This renders a RSS URL with titles and links into the given text widget:}
 proc showRSS {w url} {
    $w delete 1.0 end
    upvar #0 [geturl_followRedirects $url] arr
    if ![info exists arr(body)] {set arr(body) "<html>not found :(</html>"}
    foreach {tag content} [html2txt $arr(body)] {
        switch -- $tag {
            <description> {set descr $content}
            </description> {$w insert end "$title - $descr\n"}
            <title> {set title $content}
            <link>  {set link $content}
            </item> {$w insert end " - " "" $title\n [list link $link]}
        }
    }
 }

if 0 {This is a crude HTML renderer, which uses a regexp to split the document into a tag nontag... sequence, and depending on the tag, renders the non-tag content in the given text widget:}
 proc showHTML {w url} {
    $w delete 1.0 end
    upvar #0 [geturl_followRedirects $url] arr
    foreach {tag content} [html2txt $arr(body)] {
        if {[string length $content]<20} continue
        if [regexp userAgent $content] continue
        switch -glob -- [string tolower $tag] {
            <title> {$w insert end $content title \n\n}
            <b>     {$w insert end $content bold \n}
            <div* - </div> - <p> - <!--* - </i> -
            <br* {$w insert end $content\n}
        }
        update
    }
 }
 proc html2txt {html} {
   set res {}
   set re {(<[^>]+>) *([^<>]*)}
   foreach {all tag content} [regexp -all -inline $re $html] {
      if {![regexp src= $content]} {
              lappend res [deblank $tag] [deblank $content]
      }
   }
   string map {
      Ü Ü ß ß ä ä ö ö ü ü „ ' “ '
      &auml; ä &ouml; ö &uuml; ü &szlig; ß
   } $res
 }
 proc deblank string {regsub -all {\s+} $string " "}

#-- This redirecting geturl is courtesy of KPV's http://wiki.tcl.tk/11831
 proc geturl_followRedirects {url args} {
    array set URI [::uri::split $url] ;# Need host info from here
    while {1} {
        set token [eval [list http::geturl $url] $args]
        if {![string match {30[1237]} [::http::ncode $token]]} {return $token}
        array set meta [set ${token}(meta)]
        if {![info exist meta(Location)]} {
            return $token
        }
        array set uri [::uri::split $meta(Location)]
        unset meta
        if {$uri(host) == ""} { set uri(host) $URI(host) }
        # problem w/ relative versus absolute paths
        set url [eval ::uri::join [array get uri]]
    }
 }

 main $argv

#-- little debugging helpers (the F1 part works on Windows and Mac only)
 bind . <Escape> {exec wish $argv0 &; exit}
 bind . <F1> {console show}

if 0 {

Category Toys | Category Internet | Arts and crafts of Tcl-Tk programming }

[metoto] - 2009-12-30 11:55:36

Hi again Richard, you are clever enough to write a parsing script (non php) which can make all Rss Feed links active on a given webpage? Can you send an example to me at [email protected] :) ...Allan.

[metoto] - 2009-12-30 11:56:48

By the way very good work you do here. :) have a great 2010