proc SplitIntoWords {block} {
# We need to split the block up into words but cannot use
# list operations as they throw away some significant
# quoting, and [split] ignores braces as it should.
# Therefore what we do is gradually build up a string out of
# whitespace separated strings. We cannot use [split] to
# split the block into whitespace separated strings as it
# throws away the whitespace which maybe important so we
# have to do it all by hand.
set words {}
set word ""
while {[string length $block]} {
# Look for the next group of whitespace characters.
if {[regexp -indices "\[ \t\n\]+" $block all]} {
# Remove the text leading up to and including the white space
# from the block.
set text [string range $block 0 [lindex $all 1]]
set block [string range $block [expr {[lindex $all 1] + 1}] end]
} else {
# Take everything up to the end of the block.
set text $block
set block {}
}
# Add the text to the end of the word we are building up.
append word $text
if { [catch {llength $word} length] == 0 && $length == 1} {
# The word is a valid list so add it to the list.
lappend words [string trim $word]
set word {}
}
}
# If the last word has not been added to the list then there
# is a problem.
if { [string length $word] } {
error "incomplete word \"$word\""
}
return $words
}
proc SplitIntoWordsStripComments {block} {
# We need to split the block up into words but cannot use
# list operations as they throw away some significant
# quoting, and [split] ignores braces as it should.
# Therefore what we do is gradually build up a string out of
# whitespace separated strings. We cannot use [split] to
# split the block into whitespace separated strings as it
# throws away the whitespace which maybe important so we
# have to do it all by hand.
set words {}
set word ""
set comment 0
while {[string length $block]} {
# Look for the next group of whitespace characters.
if {[regexp -indices "\[ \t\n\]+" $block all]} {
# Remove the text leading up to and including the white space
# from the block.
set text [string range $block 0 [lindex $all 1]]
set block [string range $block [expr {[lindex $all 1] + 1}] end]
} else {
# Take everything up to the end of the block.
set text $block
set block {}
}
# Add the text to the end of the word we are building up.
append word $text
# If the word is a comment then check to see whether it is
# complete yet.
if { $comment } {
set index [string first "\n" $word]
if { $index != -1 } {
# The comment has been terminated.
set comment 0
}
# Discard the part of the comment which has already been
# found, even if a whole comment has been found only white space
# could have come after the newline and that whitespace is not
# significant.
set word ""
} elseif { [regexp -indices "^\[ \t\n\]*#" $word all] } {
# The word starts with a hash so it is a comment, strip
# off the matched portion which could contain newline
# characters which would confuse the search for a terminating
# newline character.
set word [string range $word [lindex $all 1] end]
set index [string first "\n" $word]
if { $index == -1 } {
# The comment has not yet been terminated so keep looking
# for the comment.
set comment 1
}
# Discard the part of the comment which has already been
# found, even if a whole comment has been found only white space
# could have come after the newline and that whitespace is not
# significant.
set word ""
} elseif { [catch {llength $word} length] == 0 && $length == 1} {
# The word is a valid list so add it to the list.
lappend words [string trim $word]
set word {}
}
}
# If the last word has not been added to the list then there
# is a problem.
if { [string length $word] } {
error "incomplete word \"$word\""
}
return $words
}The function below is an improved version of the first function, SplitIntoWords. When the input to the first function is a string containing many words inside quotes, then the running time for the first function is O(N²) where N is the number of words in the input. For example, if the input is: {pattern {a b c d e}}, then the first function must parse the partial list:
{a
{a b
{a b c
...
{a b c d e}This type of input can be very common because the input is often a pattern action pairs where the action is a Tcl script containing many words and the pattern is a single word. The following function improves the running time by only making a list check when a word with a quote character ["{}] is seen. This can reduce the running time for the above example from O(N²) to O(N). However, the worst case running time is still O(N²) when every word inside the list also contains a quote character.proc SplitIntoWords {block} {
# We need to split the block up into words but cannot use
# list operations as they throw away some significant
# quoting, and [split] ignores braces as it should.
# Therefore what we do is gradually build up a string out of
# whitespace separated strings. We cannot use [split] to
# split the block into whitespace separated strings as it
# throws away the whitespace which maybe important so we
# have to do it all by hand.
set words {}
set word ""
while {[string length $block]} {
# Look for the next word containing a quote: " { }
if {[regexp -indices {[^ \t\n]*[\"\{\}]+[^ \t\n]*} \
$block all]} {
# Get the text leading up to this word, but not
# including this word from the block.
set text [string range $block 0 \
[expr {[lindex $all 0] - 1}]]
# Get the word with the quote
set wordWithQuote [string range $block \
[lindex $all 0] [lindex $all 1]]
# Remove all text up to and including the word from the
# block.
set block [string range $block \
[expr {[lindex $all 1] + 1}] end]
} else {
# Take everything up to the end of the block.
set text $block
set wordWithQuote {}
set block {}
}
if {$word != {}} {
# If we saw a word with quote before, then there is a
# partial list starting with that word. In this case, add
# the text and the current word to this partial list.
append word $text $wordWithQuote
} else {
# Add the text to the result. There is no need to parse
# the text because it couldn't be a part of any list.
# Then start a list with the word because we need to pass
# this word to the Tcl parser
append words $text
set word $wordWithQuote
}
if { [catch {llength $word} length] == 0 && $length == 1} {
# The word is a valid list so add it to the list.
lappend words [string trim $word]
set word {}
}
}
# If the last word has not been added to the list then there
# is a problem.
if { [string length $word] } {
error "incomplete word \"$word\""
}
return $words
}Who made this? It's quite neat... -FW

