Overview edit
AMG: This page presents a
C parser which finds and describes global variables and types. Not everything is implemented, for example function pointers and bit fields, but it does go quite far.
Yeti and
ylex are used to implement the grammar and scanner. The output is in
dict format and an
SQLite database, whichever works better for your application. Your local C compiler is used to obtain numeric data such as sizes, offsets, alignments, and enumeration values.
High-level organization edit
The code is divided into three scripts:
cscanner.tcl,
cparser.tcl, and
driver.tcl.
cscanner.tcl converts the source code into a sequence of tokens.
cparser.tcl converts the sequence of tokens into a dict describing the global variables and types.
driver.tcl orchestrates the process:
- "Compile" cscanner.tcl and cparser.tcl using yeti and ylex, caching the result to speed up subsequent invocations.
- Run the C preprocessor on the source code, after removing all #include directives, to strip away macros, conditionals, and comments.
- Run the scanner and parser to get the dict describing the global variables and types.
- Run the C compiler to augment the dict with sizes, offsets, and so forth.
- Convert the augmented dict to an SQLite database.
All generated files are placed in the tmp subdirectory.
driver.tcl assumes you have yeti in the lib subdirectory, though if it's in any standard Tcl library installation directories, it'll be found there too.
The reason
#include is stripped is to avoid system header files which typically use compiler extensions, e.g. built-in functions, that this code doesn't know how to parse. Your application may require a different approach.
cscanner.tcl edit
# cscanner.tcl
package require ylex
# Create the object used to assemble the scanner.
yeti::ylex CScannerGenerator -name CScanner
# On error, print the filename, line number, and column number.
CScannerGenerator code error {
if {$file ne {}} {
puts -nonewline $verbout $file:
}
puts $verbout "$line:$column: $yyerrmsg"
}
# Define public variables and methods.
CScannerGenerator code public {
variable file {} ;# Current file name, or empty string if none.
variable line 1 ;# Current line number.
variable column 1 ;# Current column number.
variable typeNames {} ;# List of TYPE_NAME tokens.
# addTypeName --
# Adds a typedef name to the list of names treated as TYPE_NAME.
method addTypeName {name} {
lappend typeNames $name
}
}
# Define internal methods.
CScannerGenerator code private {
# result --
# Common result handler for matches. Updates the line and column counts,
# and (if provided) returns the arguments to the caller's caller.
method result {args} {
set text [string map {\r {}} $yytext]
set start 0
while {$start < [string length $text]} {
regexp -start $start {([^\n\t]*)([\n\t]?)} $text chunk body space
incr column [string length $body]
if {$space eq "\n"} {
set column 1
incr line
} elseif {$space eq "\t"} {
set column [expr {(($column + 7) & ~3) + 1}]
}
incr start [string length $chunk]
}
if {[llength $args]} {
return -level 2 $args
}
}
# lineDirective --
# Processes #line directives.
method lineDirective {} {
if {[regexp -expanded {
^[\ \t\v\f]*\#[\ \t\v\f]*(?:line[\ \t\v\f]+)?
(\d+)(?:[\ \t\v\f]+"((?:[^\\"]|\\.)+)")?
} $yytext _ line newFile] && $newFile ne ""} {
set file [subst -nocommands -novariables $newFile]
}
}
# tokenType --
# Decides if a token is TYPE_NAME or IDENTIFIER according to $typeNames.
method tokenType {} {
if {$yytext in $typeNames} {
return TYPE_NAME
} else {
return IDENTIFIER
}
}
}
# Define useful abbreviations for upcoming regular expressions.
CScannerGenerator macro {
EXP {[eE][+-]?\d+}
INT_SUFFIX {[uU]?[lL]{0,2}|[lL]{0,2}[uU]?}
WHITESPACE {[ \t\v\n\f]}
C_COMMENT {/\*.*?\*/}
C99_COMMENT {//(?:\\.|[^\n\\])*(?:\n|$)}
IGNORE {<WHITESPACE>|<C_COMMENT>|<C99_COMMENT>}
DECIMAL {\d+<INT_SUFFIX>\M}
OCTAL {0[0-7]+<INT_SUFFIX>\M}
HEXADECIMAL {0x[[:xdigit:]]+<INT_SUFFIX>\M}
CHAR_LITERAL {L?'(?:[^\\']|\\.)+'}
INTEGER {<DECIMAL>|<OCTAL>|<HEXADECIMAL>|<CHAR_LITERAL>}
REAL {(?:\d+<EXP>|\d*\.\d+<EXP>?|\d+\.\d*<EXP>?)[fFlL]?\M}
STRING_LITERAL {L?"(?:[^\\"]|\\.)+"}
CONSTANT {<INTEGER>|<REAL>}
}
# Generate a regular expression matching any simple token. The value of such
# tokens is the uppercase version of the token string itself.
foreach token {
auto bool break case char const continue default do double else enum extern
float for goto if int long register return short signed sizeof static struct
switch typedef union unsigned void volatile while ... >>= <<= += -= *= /= %=
&= ^= |= >> << ++ -- -> && || <= >= == != ; \{ \} , : = ( ) [ ] . & ! ~ - +
* / % < > ^ | ?
} {
lappend pattern [regsub -all {[][*+?{}()<>|.^$\\]} $token {\\&}]
}
set pattern [join $pattern |]
# Match simple tokens.
CScannerGenerator add $pattern {result [string toupper $yytext]}
# Match and decode more complex tokens.
CScannerGenerator add {
{<IGNORE>} {result}
{(?n)^[ \t\v\f]*#.*$\n} {lineDirective}
{[a-zA-Z_]\w*\M} {result [tokenType] $yytext}
{<CONSTANT>} {return CONSTANT}
{<STRING_LITERAL>} {result STRING_LITERAL}
{.} {error "invalid character \"$yytext\""}
}
# vim: set sts=4 sw=4 tw=80 et ft=tcl:
cparser.tcl edit
package require yeti
yeti::yeti CParserGenerator -name CParser -start translation_unit
CParserGenerator code private {
variable database {}
variable typedef 0
method registerTypedef {declarator} {
if {$typedef} {
$scanner addTypeName [dict get $declarator name]
}
}
method registerDecl {decls} {
foreach decl $decls {
if {[dict exists $decl name]} {
set category [lindex {variable typedef} $typedef]
dict set decl category $category
dict set database $category [dict get $decl name] $decl
}
}
set typedef 0
}
method appendArray {decl} {
dict lappend decl index [dict create method array]
}
method mergeType {lhs rhs} {
if {[dict exists $lhs name] && [dict exists $rhs name]} {
dict set rhs name [lsort [concat\
[dict get $lhs name] [dict get $rhs name]]]
}
dict merge $lhs $rhs
}
method mergeDeclType {type decls} {
if {![dict size $type]} {
set type [newCore int]
}
if {[dict exists $type name] || [llength $decls] < 2} {
lmap decl $decls {dict replace $decl type $type}
} else {
set decls [lassign $decls first]
dict set first type $type
set name [dict get $first name]
list $first {*}[lmap decl $decls {dict replace $decl reuse $name}]
}
}
method newCore {name} {
if {![dict exists database core $name]} {
dict set database core $name category core
dict set database core $name name $name
}
dict create category core name $name
}
method newAgg {category name fields} {
if {$name eq {}} {
dict create category $category fields $fields
} else {
if {[llength $fields]} {
dict set database $category $name name $name
dict set database $category $name category $category
dict set database $category $name fields $fields
}
dict create category $category name $name
}
}
method newEnum {name enumerators} {
if {$name eq {}} {
dict create category enum enumerators $enumerators
} else {
if {[llength $enumerators]} {
dict set database enum $name name $name
dict set database enum $name category enum
dict set database enum $name enumerators $enumerators
}
dict create category enum name $name
}
}
}
CParserGenerator code error {
if {[set file [$scanner cget -file]] ne {}} {
puts -nonewline $verbout $file:
}
puts $verbout "[$scanner cget -line]:[$scanner cget -column]: $yyerrmsg"
}
CParserGenerator code reset {
set database {}
set typedef 0
}
CParserGenerator code returndefault {}
CParserGenerator add {
optional_comma
{} -
| {,} -
primary_expression
{IDENTIFIER} -
| {CONSTANT} -
| {STRING_LITERAL} -
| {( expression )} -
postfix_expression
{primary_expression} -
| {postfix_expression [ expression ]} -
| {postfix_expression ( )} -
| {postfix_expression ( argument_expression_list )} -
| {postfix_expression . IDENTIFIER} -
| {postfix_expression -> IDENTIFIER} -
| {postfix_expression ++} -
| {postfix_expression --} -
argument_expression_list
{assignment_expression} -
| {argument_expression_list , assignment_expression} -
unary_expression
{postfix_expression} -
| {++ unary_expression} -
| {-- unary_expression} -
| {unary_operator cast_expression} -
| {SIZEOF unary_expression} -
| {SIZEOF ( type_name )} -
unary_operator
{&} -
| {*} -
| {+} -
| {-} -
| {!} -
| {~} -
cast_expression
{unary_expression} -
| {( type_name ) cast_expression} -
multiplicative_expression
{cast_expression} -
| {multiplicative_expression * cast_expression} -
| {multiplicative_expression / cast_expression} -
| {multiplicative_expression % cast_expression} -
additive_expression
{multiplicative_expression} -
| {additive_expression + multiplicative_expression} -
| {additive_expression - multiplicative_expression} -
shift_expression
{additive_expression} -
| {shift_expression << additive_expression} -
| {shift_expression >> additive_expression} -
relational_expression
{shift_expression} -
| {relational_expression < shift_expression} -
| {relational_expression > shift_expression} -
| {relational_expression <= shift_expression} -
| {relational_expression >= shift_expression} -
equality_expression
{relational_expression} -
| {equality_expression == relational_expression} -
| {equality_expression != relational_expression} -
and_expression
{equality_expression} -
| {and_expression & equality_expression} -
exclusive_or_expression
{and_expression} -
| {exclusive_or_expression ^ and_expression} -
inclusive_or_expression
{exclusive_or_expression} -
| {inclusive_or_expression | exclusive_or_expression} -
logical_and_expression
{inclusive_or_expression} -
| {logical_and_expression && inclusive_or_expression} -
logical_or_expression
{logical_and_expression} -
| {logical_or_expression || logical_and_expression} -
conditional_expression
{logical_or_expression} -
| {logical_or_expression ? expression : conditional_expression} -
assignment_expression
{conditional_expression} -
| {unary_expression assignment_operator assignment_expression} -
assignment_operator
{=} -
| {*=} -
| {/=} -
| {%=} -
| {+=} -
| {-=} -
| {<<=} -
| {>>=} -
| {&=} -
| {^=} -
| {|=} -
expression
{assignment_expression} -
| {expression , assignment_expression} -
constant_expression
{conditional_expression} -
declaration
{declaration_specifiers ;} {return $1}
| {declaration_specifiers init_declarator_list ;} {mergeDeclType $1 $2}
declaration_specifiers
{storage_class_specifier} -
| {storage_class_specifier declaration_specifiers} {return $2}
| {type_specifier} {return $1}
| {type_specifier declaration_specifiers} {mergeType $1 $2}
| {type_qualifier} -
| {type_qualifier declaration_specifiers} {return $2}
init_declarator_list
{init_declarator} {list $1}
| {init_declarator_list , init_declarator} {concat $1 [list $3]}
init_declarator
{declarator} {registerTypedef $1; return $1}
| {declarator = initializer} {return $1}
storage_class_specifier
{TYPEDEF} {set typedef 1}
| {EXTERN} -
| {STATIC} -
| {AUTO} -
| {REGISTER} -
type_specifier
{VOID} {newCore void}
| {CHAR} {newCore char}
| {SHORT} {newCore short}
| {INT} {newCore int}
| {LONG} {newCore long}
| {FLOAT} {newCore float}
| {DOUBLE} {newCore double}
| {SIGNED} {newCore signed}
| {UNSIGNED} {newCore unsigned}
| {struct_or_union_specifier} {return $1}
| {enum_specifier} {return $1}
| {TYPE_NAME} {dict create category typedef name $1}
struct_or_union_specifier
{struct_or_union IDENTIFIER \{ struct_declaration_list \}} {newAgg $1 $2 $4}
| {struct_or_union \{ struct_declaration_list \}} {newAgg $1 {} $3}
| {struct_or_union IDENTIFIER} {newAgg $1 $2 {}}
struct_or_union
{STRUCT} {return struct}
| {UNION} {return union}
struct_declaration_list
{struct_declaration} {return $1}
| {struct_declaration_list struct_declaration} {concat $1 $2}
struct_declaration
{specifier_qualifier_list struct_declarator_list ;} {mergeDeclType $1 $2}
specifier_qualifier_list
{type_specifier specifier_qualifier_list} {mergeType $1 $2}
| {type_specifier} {return $1}
| {type_qualifier specifier_qualifier_list} {return $2}
| {type_qualifier} -
struct_declarator_list
{struct_declarator} {list $1}
| {struct_declarator_list , struct_declarator} {concat $1 [list $3]}
struct_declarator
{declarator} {return $1}
| {: constant_expression} -
| {declarator : constant_expression} {return $1}
enum_specifier
{ENUM \{ enumerator_list optional_comma \}} {newEnum {} $3}
| {ENUM IDENTIFIER \{ enumerator_list optional_comma \}} {newEnum $2 $4}
| {ENUM IDENTIFIER} {newEnum $2 {}}
enumerator_list
{enumerator} {list [dict create name $1]}
| {enumerator_list , enumerator} {concat $1 [list [dict create name $3]]}
enumerator
{IDENTIFIER} {return $1}
| {IDENTIFIER = constant_expression} {return $1}
type_qualifier
{CONST} -
| {VOLATILE} -
declarator
{pointer direct_declarator} {dict lappend 2 index {*}$1}
| {direct_declarator} {return $1}
direct_declarator
{IDENTIFIER} {dict create name $1}
| {( declarator )} {return $2}
| {direct_declarator [ constant_expression ]} {appendArray $1}
| {direct_declarator [ ]} {appendArray $1}
| {direct_declarator ( parameter_type_list )} {return $1}
| {direct_declarator ( identifier_list )} {return $1}
| {direct_declarator ( )} {return $1}
pointer
{*} {list [dict create method pointer]}
| {* type_qualifier_list} {list [dict create method pointer]}
| {* pointer} {lappend 2 [dict create method pointer]}
| {* type_qualifier_list pointer} {lappend 3 [dict create method pointer]}
type_qualifier_list
{type_qualifier} -
| {type_qualifier_list type_qualifier} -
parameter_type_list
{parameter_list} -
| {parameter_list , ...} -
parameter_list
{parameter_declaration} -
| {parameter_list , parameter_declaration} -
parameter_declaration
{declaration_specifiers declarator} -
| {declaration_specifiers abstract_declarator} -
| {declaration_specifiers} -
identifier_list
{IDENTIFIER} -
| {identifier_list , IDENTIFIER} -
type_name
{specifier_qualifier_list} -
| {specifier_qualifier_list abstract_declarator} -
abstract_declarator
{pointer} -
| {direct_abstract_declarator} -
| {pointer direct_abstract_declarator} -
direct_abstract_declarator
{( abstract_declarator )} -
| {[ ]} -
| {[ constant_expression ]} -
| {direct_abstract_declarator [ ]} -
| {direct_abstract_declarator [ constant_expression ]} -
| {( )} -
| {( parameter_type_list )} -
| {direct_abstract_declarator ( )} -
| {direct_abstract_declarator ( parameter_type_list )} -
initializer
{assignment_expression} -
| {\{ initializer_list optional_comma \}} -
initializer_list
{initializer} -
| {initializer_list , initializer} -
statement
{labeled_statement} -
| {compound_statement} -
| {expression_statement} -
| {selection_statement} -
| {iteration_statement} -
| {jump_statement} -
labeled_statement
{IDENTIFIER : statement} -
| {CASE constant_expression : statement} -
| {DEFAULT : statement} -
compound_statement
{\{ \}} -
| {\{ statement_list \}} -
| {\{ declaration_list \}} -
| {\{ declaration_list statement_list \}} -
declaration_list
{declaration} -
| {declaration_list declaration} -
statement_list
{statement} -
| {statement_list statement} -
expression_statement
{;} -
| {expression ;} -
selection_statement
{IF ( expression ) statement} -
| {IF ( expression ) statement ELSE statement} -
| {SWITCH ( expression ) statement} -
iteration_statement
{WHILE ( expression ) statement} -
| {DO statement WHILE ( expression ) ;} -
| {FOR ( expression_statement expression_statement ) statement} -
| {FOR ( expression_statement expression_statement expression ) statement} -
jump_statement
{GOTO IDENTIFIER ;} -
| {CONTINUE ;} -
| {BREAK ;} -
| {RETURN ;} -
| {RETURN expression ;} -
translation_unit
{external_declaration} {return $database}
| {translation_unit external_declaration} {return $database}
external_declaration
{;} -
| {function_definition} -
| {declaration} {registerDecl $1}
function_definition
{declaration_specifiers declarator declaration_list compound_statement} -
| {declaration_specifiers declarator compound_statement} -
| {declarator declaration_list compound_statement} -
| {declarator compound_statement} -
}
driver.tcl edit
#!/usr/bin/env tclsh
# driver.tcl
lappend auto_path [file join [file dirname [info script]] lib]
package require sqlite3
# ---------------------------- UTILITY PROCEDURES ------------------------------
# dict_getnull --
# Implementation of [dict getnull]. Behaves like [dict get] but returns empty
# string for missing keys.
proc dict_getnull {dictionary args} {
if {[dict exists $dictionary {*}$args]} {
dict get $dictionary {*}$args
}
}
namespace ensemble configure dict -map [dict replace\
[namespace ensemble configure dict -map] getnull dict_getnull]
# print --
# Generates code to invoke printf() in a way that works well for producing
# nested Tcl lists and dictionaries a few elements at a time.
proc print {format args} {
upvar 1 code code sep sep
if {[string index $format 0] eq "\}" || ![info exists sep]} {
set sep \"\"
}
append code "\nprintf(\"%s$format\", [join [linsert $args 0 $sep] ", "]);"
if {[string index $format end] eq "\{"} {
unset sep
} else {
set sep "\" \""
}
}
# enquire --
# Generates code to ask the C compiler for variable and type definitions. This
# design approach avoids making assumptions about portability by getting the
# necessary data from the authority.
proc enquire {path def} {
# Use the caller's sep variable for formatting continuity.
upvar 1 sep sep
# Emit comment to help with debugging.
append code "\n/* $path */"
append code "\n/* $def */"
# Emit basic keys.
dict for {key value} $def {
if {$key ni {enumerators fields index type}} {
print "%s %s" \"$key\" \"$value\"
}
}
# Add the value key to each element of the enumerator list, if present.
if {[dict exists $def enumerators]} {
print "enumerators \{"
foreach symbol [dict get $def enumerators] {
print \{
dict for {key value} $symbol {
print "%s %s" \"$key\" \"$value\"
}
print "value %d" [dict get $symbol name]
print \}
}
print \}
}
# Recursively process the type, dereferencing all indirection to drill down
# to the underlying type.
if {[dict exists $def type]} {
print "type \{"
set subpath $path[string repeat \[0\]\
[llength [dict getnull $def index]]]
append code [enquire $subpath [dict get $def type]]
print \}
}
# Recursively process each field, adding the aggregate-relative offset.
if {[dict exists $def fields]} {
print "fields \{"
foreach field [dict get $def fields] {
print \{
set subpath $path.[dict get $field name]
append code [enquire $subpath $field]
print "offset %zu" "(size_t)&$subpath - (size_t)&$path"
print \}
}
print \}
}
# Emit pointer and array indexing data, adding array stride and count.
if {[dict exists $def index]} {
print "index \{"
set outside $path
foreach level [dict get $def index] {
print \{
dict for {key value} $level {
print "%s %s" \"$key\" \"$value\"
}
set inside $outside\[0\]
if {[dict get $level method] eq "array"} {
print "stride %zu" sizeof($inside)
print "count %zu" "sizeof($outside) / sizeof($inside)"
}
set outside $inside
print \}
}
print \}
}
# Emit the overall size of the type or variable.
print "size %zu" sizeof($path)
# Return the generated code.
return $code
}
# record --
# Inserts type data into the SQL database, and returns the type ID.
proc record {db def} {
# Attempt to get the type from the type table given its category and name.
# Create it if it doesn't already exist. Due to the nature of SQL NULL, all
# untagged types are considered distinct, which is desirable behavior.
set category [dict get $def category]
set name [dict getnull $def name]
set size [dict get $def size]
if {[set parent [$db onecolumn {
SELECT type
FROM types
WHERE category = $category
AND name = nullif($name, '')
}]] eq {}} {
$db eval {INSERT INTO types VALUES (NULL, $category,
nullif($name, ''), $size)}
set parent [$db last_insert_rowid]
}
# Record the variable or type data. Ignore core and incomplete types.
if {[dict exists $def enumerators]} {
# Record the enumerators list.
set enumeratorIdx 0
foreach enum [dict get $def enumerators] {
set symbol [dict get $enum name]
set value [dict get $enum value]
$db eval {INSERT INTO enumerators VALUES ($parent, $enumeratorIdx,
$symbol, $value)}
incr enumeratorIdx
}
} elseif {[dict exists $def fields] || [dict exists $def type]
|| [dict exists $def reuse]} {
# Prepare to iterate through all fields in the aggregate. If this is
# not an aggregate, make a single-element list containing the type data.
if {[dict exists $def fields]} {
set fields [dict get $def fields]
} else {
set fields [list [dict filter $def key type reuse index]]
}
# Record the fields list.
set fieldIdx 0
set types {}
foreach field $fields {
# Determine the field's type identifier.
if {[dict exists $field type]} {
# The type is named or not being reused. Recursively invoke
# this procedure to get its type ID, possibly creating it in the
# process, possibly recursing further.
set type [record $db [dict get $field type]]
} elseif {[dict size $types]} {
# The type is anonymous and being reused. Get the previous type
# ID from the local reuse table, referenced by field name.
set type [dict get $types [dict get $field reuse]]
} else {
# The type is anonymous and being reused. Get the type ID from
# the output database, referenced by symbol name.
set reuse [dict get $field reuse]
set type [$db onecolumn {
SELECT fields.type
FROM types, fields ON types.type = fields.parent
WHERE category = $category
AND types.name = $reuse
}]
}
# Insert the field record into the fields table.
set name [dict getnull $field name]
set size [dict getnull $field size]
set offset [dict getnull $field offset]
dict set types $name $type
$db eval {INSERT INTO fields VALUES ($parent, $fieldIdx, $type,
nullif($name, ''), nullif($size, ''), nullif($offset, ''))}
# Insert the field's indirection data into the indexes table.
if {[dict exists $field index]} {
set indexIdx 0
foreach index [dict get $field index] {
if {[dict get $index method] eq "array"} {
set stride [dict get $index stride]
set count [dict get $index count]
} else {
unset -nocomplain stride count
}
$db eval {INSERT INTO indexes VALUES ($parent, $fieldIdx,
$indexIdx, $stride, $count)}
incr indexIdx
}
}
incr fieldIdx
}
}
# Return the type ID to the caller.
return $parent
}
# --------------- PARSE INPUT FILE TO OBTAIN LEXICAL STRUCTURE -----------------
# Load scanner and parser scripts, building them if they're not current.
file mkdir tmp
foreach {file class} {cscanner CScannerGenerator cparser CParserGenerator} {
if {![file exists tmp/$file.tcl]
|| [file mtime $file.tcl] > [file mtime tmp/$file.tcl]} {
source $file.tcl
set chan [open tmp/$file.tcl w]
chan puts $chan "package require itcl\n"
chan puts $chan [$class dump]
chan close $chan
itcl::delete object $class
}
source tmp/$file.tcl
}
# Read input file, strip #include, and apply preprocessor.
set chan [open [lindex $argv 0]]
regsub -all -line {^[ \t\v\f]*#[ \t\v\f]*include.*} [chan read $chan] {} input
chan close $chan
set input [exec cpp << "#line 1 \"[lindex $argv 0]\"\n$input"]
# Initialize scanner and parser.
CScanner scanner
scanner start $input
CParser parser -scanner scanner
# Parse input to obtain global type and variable database.
set dataDict [parser parse]
# ----------- INVOKE COMPILER TO OBTAIN MEMORY LAYOUT INFORMATION --------------
# Generate program header, including input to be analyzed.
append code $input
append code "\n#line [expr {[regexp -all {\n} $input] + 3}] \"tmp/enquire.c\""
append code "\n#include <stddef.h>"
append code "\nint main(void)"
append code "\n\{"
append code "\nextern int printf(const char *, ...);"
# Generate program body to get all type and variable data.
dict for {category definitions} $dataDict {
print "%s \{" \"$category\"
dict for {name definition} $definitions {
switch $category {
core - typedef {set path "(($name *)0)\[0\]"}
struct - union - enum {set path "(($category $name *)0)\[0\]"}
variable {set path $name}
}
print "%s \{" \"$name\"
append code [enquire $path $definition]
print \}
}
print \}
}
# Generate program footer.
append code "\nreturn 0;"
append code "\n\}"
# Write program to a temporary file.
set chan [open tmp/enquire.c w]
chan puts $chan $code
chan close $chan
# Compile and run the program to get the complete database.
exec cc tmp/enquire.c -o tmp/enquire
set dataDict [exec tmp/enquire]
# --------- STORE STRUCTURE AND LAYOUT INFORMATION INTO SQL DATABASE -----------
# Build the SQL type database.
file delete tmp/database.db tmp/database.db-journal
sqlite3 db tmp/database.db
db eval {PRAGMA foreign_keys = 1}
db transaction {
# Define the database schema.
db eval {
CREATE TABLE types (
type INTEGER PRIMARY KEY,
category TEXT NOT NULL,
name TEXT,
size INTEGER NOT NULL,
UNIQUE (category, name)
);
CREATE TABLE fields (
parent INTEGER NOT NULL REFERENCES types,
field INTEGER NOT NULL,
type INTEGER NOT NULL REFERENCES types,
name TEXT,
size INTEGER,
offset INTEGER,
PRIMARY KEY (parent, field),
UNIQUE (parent, name),
CHECK ((name IS NULL) = (size IS NULL)),
CHECK ((name IS NULL) = (offset IS NULL))
);
CREATE TABLE indexes (
parent INTEGER NOT NULL REFERENCES types,
field INTEGER NOT NULL,
level INTEGER NOT NULL,
stride INTEGER,
count INTEGER,
PRIMARY KEY (parent, field, level),
FOREIGN KEY (parent, field) REFERENCES fields,
CHECK ((stride IS NULL) = (count IS NULL))
);
CREATE TABLE enumerators (
parent INTEGER NOT NULL REFERENCES types,
sort INTEGER NOT NULL,
symbol TEXT NOT NULL UNIQUE,
value INTEGER NOT NULL,
PRIMARY KEY (parent, sort)
);
}
# Transfer all type data from the dict database to the SQL database.
dict for {category definitions} $dataDict {
dict for {name definition} $definitions {
record db $definition
}
}
}
db close
# vim: set sts=4 sw=4 tw=80 et ft=tcl:
Put this in test.c:
typedef struct {int a; enum T {E1, E2} *b;} S, (*P)[3];
P x, *y[5];
Run
./driver.tcl test.c. Here's the contents of
$dataDict, as returned by the parser, formatted for display with the dict keys in bold:
core {
int {
category core
name int
}
} enum {
T {
category enum
name T
enumerators {{name E1} {name E2}}
}
} typedef {
S {
category typedef
name S
type {
category struct
fields {
{name a type {category core name int}}
{name b index {{method pointer}} type {category enum name T}}
}
}
} P {
category typedef
name P
index {{method pointer} {method array}}
reuse S
}
} variable {
x {
category variable
name x
type {category typedef name P}
} y {
category variable
name y
index {{method array} {method pointer}}
type {category typedef name P}
}
}
tmp/enquire.c is generated, compiled, and executed, and its output is the augmented
$dataDict shown below with the new keys and values in bold:
core {
int {
category core name int
size 4
}
} enum {
T {
category enum name T
enumerators {
{name E1 value 0}
{name E2 value 1}
}
size 4
}
} typedef {
S {
category typedef name S
type {
category struct
fields {
{name a type {category core name int size 4} size 4 offset 0}
{name b type {category enum name T size 4} index {{method pointer}} size 8 offset 8}
}
size 16
}
size 16
} P {
category typedef name P
reuse S
index {{method pointer} {method array stride 16 count 3}}
size 8
}
} variable {
x {
category variable name x
type {category typedef name P size 8}
size 8
} y {
category variable name y
type {category typedef name P size 8}
index {{method array stride 8 count 5} {method pointer}}
size 40
}
}
Last, this above data structure is stored into the SQLite database
tmp/database.db, contents as follows:
sqlite> select * from types;
type category name size
---------- ---------- ---------- ----------
1 core int 4
2 enum T 4
3 typedef S 16
4 struct 16
5 typedef P 8
6 variable x 8
7 variable y 40
sqlite> select * from fields;
parent field type name size offset
---------- ---------- ---------- ---------- ---------- ----------
4 0 1 a 4 0
4 1 2 b 8 8
3 0 4
5 0 4
6 0 5
7 0 5
sqlite> select * from indexes;
parent field level stride count
---------- ---------- ---------- ---------- ----------
4 1 0
5 0 0
5 0 1 16 3
7 0 0 8 5
7 0 1
sqlite> select * from enumerators;
parent sort symbol value
---------- ---------- ---------- ----------
2 0 E1 0
2 1 E2 1
In this schema,
parent is a foreign key into the
type column of the
types table, and
field is a foreign key into the
fields table.
Yes, I know this schema abuses the terms
type and
field when it comes to variables. A variable is expressed as a type with a single field, and that field is the actual type of the variable. If you can think of better names for the tables, please share.