package require uri
proc geturl_followRedirects {url args} {
array set URI [::uri::split $url] ;# Need host info from here
while {1} {
set token [eval [list http::geturl $url] $args]
if {![string match {30[1237]} [::http::ncode $token]]} {return $token}
array set meta [set ${token}(meta)]
if {![info exist meta(Location)]} {
return $token
}
array set uri [::uri::split $meta(Location)]
unset meta
if {$uri(host) == ""} { set uri(host) $URI(host) }
# problem w/ relative versus absolute paths
set url [eval ::uri::join [array get uri]]
}
}MAK Notes that function is not safe against infinite looping redirects (as might happen, for example, if a server is set up with an ErrorDocument page but is misconfigured such that it is forbidden as well).Easy to fix: replace "while {1}" by "foreach x {1 2 3 4 5}" and the loop becomes bounded.
Paul Walton: I've seen instances where the header names are all lowercase (eg., "location" instead of "Location"). This was on a major website, and may have also been done to prevent scraping.You could just string tolower the whole meta array.
The following does not resolve, but handles correctly the -channel option EF.
proc ::http::geturl_followRedirects {url args} {
while {1} {
set token [eval [list http::geturl $url] $args]
switch -glob -- [http::ncode $token] {
30[1237] {
if {[catch {array set OPTS $args}]==0} {
if { [info exists OPTS(-channel)] } {
seek $OPTS(-channel) 0 start
}
}
}
default { return $token }
}
upvar #0 $token state
array set meta [set ${token}(meta)]
if {![info exist meta(Location)]} {
return $token
}
set url $meta(Location)
unset meta
}
}
