Updated 2014-04-08 21:17:03 by AMG

XDR stands for External Data Representation, the marshalled form of Sun RPC communications. XDR deals with sizing and endianness issues. Similar to ASN, but not defined by the OSI.

XDR parsing in (nearly) pure tcl -- 20040609 CMcC

The following is some code to turn XDR definitions into tcl code to parse and generate XDR binary, sufficient to implement things like the mount.x files which define message flow in NFS.

Unfinished, rough.

xdr-tcl.l is a ylex/yeti program to lex the XDR definiton language parser
    #! /bin/sh
    # \
    exec itclsh "$0" ${1+"$@"}
    
    # Generate Lexical Analyzer for XDR - xdr-lex.tcl
    lappend auto_path /usr/local/lib
    package require yeti
    package require ylex
    
    set xdr_lex [yeti::ylex \#auto -name xdr_lexer]
    $xdr_lex macro \
        OCOMM {/[*]} \
        CCOMM {[*]/} \
        WS        {[ \t\f]} \
        D        {[0-9]} \
        LD        {[\.0-9]} \
        E        {[DEde][+-]?[0-9]+} \
        IDS        {[a-zA-Z]} \
        IDCH        {[a-zA-Z0-9_.$]} \
        INT        {[-]?[0-9]+} \
        LT        {[<]} \
        GT        {[>]} \
        OR        {[|]} \
        DOT        {[.]} \
        STAR        {[*]} \
        CARET        {\^} \
        LP        {[(]} \
        RP        {[)]} \
        STRING {"([^"]|"")*"} \
        EOL        {\n} \
        other        {.} \
        HASH        {#} \
        LBRACKET {\[} \
        RBRACKET {\]} \
        LBRACE {\{} \
        RBRACE {\}} \
        LP {[(]} \
        RP {[)]} \
        SEMI {[;]} \
        COLON {[:]} \
        EQUAL {[=]} \
        COMMA {[,]}
    
    $xdr_lex code public {
        variable lineno 0
    }
    
    $xdr_lex code reset {
        set lineno 0
    }
    
    $xdr_lex add -state INITIAL <OCOMM> {
            #puts stderr "COMMENT"
            set yystate COMMENT
    }
    $xdr_lex add -state COMMENT <CCOMM> {
            #puts stderr "END COMMENT"
            set yystate INITIAL
    }
    $xdr_lex add -state COMMENT . {
    }
    
    $xdr_lex add -state INITIAL {\n<WS>+} {}
    
    $xdr_lex add -state INITIAL \n {
        # ignore new lines - don't combine with white space
        incr lineno;
    }
    
    $xdr_lex add -state INITIAL <WS>+ {
        # ignore white space
    }
    
    $xdr_lex add -state INITIAL <INT> {
       return [list INT $yytext]
    }
    
    # add special character macros
    foreach ch {
        LBRACE RBRACE LT GT LBRACKET RBRACKET LP RP
        COMMA EQUAL SEMI COLON STAR INT
    } {
        $xdr_lex add -state INITIAL [list <${ch}>] [list return S_$ch]
    }
    
    set reserved {
        opaque string void
        unsigned int hyper float double quadruple bool
        enum struct union
        switch case default
        const typedef
    }
    
        #foreach char [split $word {}] {
            #append pattern \[[string toupper $char]$char\]
        #}
    
    # add reserved words
    foreach word $reserved {
        $xdr_lex add -state INITIAL -nocase ${word} \
            "return \[list S_[string toupper $word]]"
    }
    
    $xdr_lex add -state INITIAL <IDCH>+ {
        return [list S_ID [string trim $yytext]]
    }
    
    # generate the scanner code to stdout
    puts [$xdr_lex dump]
    delete object $xdr_lex

xdr-tcl.y is a yeti file to generate an XDR language parser.
    #! /bin/sh
    # \
    exec itclsh "$0" ${1+"$@"}
    
    if { [info script] == "$::argv0" } {
        lappend auto_path [pwd]
    }
    
    # Lexical analyzer for XDR.
    lappend auto_path /usr/local/lib
    package require yeti
    package require ylex
    
    set xdr_parser [eval yeti::yeti \#auto -name xdr_parser -start specification -verbose 4]
    
    $xdr_parser code public {
        method getstate {} {
            return [list $yystate $yylhs]
        }
        public variable yyterm ""
    }
    
    $xdr_parser code error {
        upvar yyterm yyterm
        #puts stderr "Error: $yyerrmsg / $yyterm"
    }
    
    $xdr_parser add {
    specification        {definition specification} {return [list $1 $2]}
    | definition {}
    
    definition        constdef        {}
    | typedef {return $1}
    
    constant        INT        {return $1}
    
    constdef        {S_CONST S_ID S_EQUAL constant S_SEMI} {
        return [CONST $2 $4]
    }
    
    assign {S_ID S_EQUAL value} {return [concat [list $1] [list $3]]}
    
    assignments assign {}
    | {assignments S_COMMA assign} {return [concat $1 $3]}
    
    enumbody {S_LBRACE assignments S_RBRACE} {
        return $2
    }
    
    declarations {declaration S_SEMI} {return [list $1]}
    |        {declaration S_SEMI declarations} {return [concat [list $1] $3]}
    
    structbody {S_LBRACE declarations S_RBRACE} {
        return $2
    }
    
    unionbody {S_SWITCH S_LP declaration S_RP S_LBRACE cases S_RBRACE} {
        return [concat [list $3] $6]
    }
    
    typedef        {S_TYPEDEF declaration S_SEMI} {return [SEM TYPEDEF $2]}
    |        {S_ENUM S_ID enumbody S_SEMI} {return [eval SEM ENUM $2 $3]}
    |        {S_STRUCT S_ID structbody S_SEMI} {return [eval SEM STRUCT $2 $3]}
    |        {S_UNION S_ID unionbody S_SEMI} {return [eval SEM UNION $2 $3]}
    
    typespec        S_BOOL {return [SCALAR BOOL]}
    |        S_FLOAT {return [SCALAR FLOAT]}
    |        S_DOUBLE {return [SCALAR DOUBLE ]}
    |        S_QUADRUPLE {return [SCALAR QUADRUPLE]}
    |        S_INT {return [SCALAR INT]}
    |        S_HYPER {return [SCALAR HYPER]}
    |        {S_UNSIGNED S_INT} {return [SCALAR INT UNSIGNED]}
    |        {S_UNSIGNED S_HYPER} {return [SCALAR HYPER UNSIGNED]}
    |        {S_ENUM enumbody} {return [SEM A_ENUM $2]}
    |        {S_STRUCT structbody} {return [SEM A_STRUCT $2]}
    |        {S_UNION unionbody} {return [SEM A_UNION $2]}
    |        S_ID {return $1}
    
    value        constant {return $1}
    |        S_ID {return [CONST_LOOKUP $1]}
    
    value_or_nil value        {return $1}
    |        {}        {return ""}
    
    declaration        S_VOID {
        return [SEM VOID]
    }
    |        {S_STRING S_ID S_LT value_or_nil S_GT} {
        return [eval SEM STRING $2 $4]
    }
    |        {S_OPAQUE S_ID S_LT value_or_nil S_GT} {
        return [eval SEM OPAQUE_VECTOR $2 $4]
    }
    |        {S_OPAQUE S_ID S_LBRACKET value S_RBRACKET} {
        return [SEM OPAQUE $2 $4]
    }
    |        {typespec S_STAR S_ID} {
        return [SEM OPTIONAL $1 $3]
    }
    |        {typespec S_ID S_LT value_or_nil S_GT} {
        return [eval SEM TYPE $1 $2 $4]
    }
    |        {typespec S_ID S_LBRACKET value S_RBRACKET} {
        return [SEM VECTOR $1 $2 $4]
    }
    |        {typespec S_ID} {
        return [SEM DECLARE $2 $1]
    }
    
    case_value        constant {return $1}
    |        S_ID {return [list CASE $1]}
    
    case        {S_CASE case_value S_COLON declaration S_SEMI} {
        return [concat [list $2] [list $4]]
    }
    
    defcase        {S_DEFAULT S_COLON declaration S_SEMI} {
        return [concat "" [list $3]]
    }
    
    cases        case {
        return $1
    }
    |        {case cases} {
        return [concat $1 $2]
    }
    |        {case defcase} {
        return [concat $1 $2]
    }
    
    }
    
    #
    # generate the parser code to stdout
    #
    puts [$xdr_parser dump]
    delete object $xdr_parser

xdr.tcl is a series of routines to pack/unpack XDR
    # routines for packing and unpacking fundamental types
    
    proc VOID_pack {s v} {
    }
    proc VOID_unpack {s v} {
    }
    
    proc STRING_pack {s v} {
        upvar $v var
        upvar $s string
    }
    proc STRING_unpack {s v} {
        upvar $v var
        upvar $s string
    }
    
    proc OPAQUE_VECTOR_pack {s v} {
        upvar $v var
        upvar $s string
    }
    proc OPAQUE_VECTOR_unpack {s v} {
        upvar $v var
        upvar $s string
    }
    
    proc OPAQUE_pack {s v len} {
        upvar $v var
        upvar $s string
    }
    proc OPAQUE_unpack {s v len} {
        upvar $v var
        upvar $s string
    }
    
    proc BOOL_pack {s v} {
        upvar $v var
        upvar $s string
        append string [binary format I $var]
    }
    proc BOOL_unpack {s v} {
        upvar $v var
        upvar $s string
        binary scan $string I var
        set string [string $string 4 end]
    }
    
    proc INT_pack {s v} {
        upvar $v var
        upvar $s string
        append string [binary format I $var]
    }
    proc INT_unpack {s v} {
        upvar $v var
        upvar $s string
        binary scan $string I var
        set string [string range $string 4 end]
    }
    
    proc INT_UNSIGNED_pack {s v} {
        upvar $v var
        upvar $s string
        append string [binary format I $var]
    }
    proc INT_UNSIGNED_unpack {s v} {
        upvar $v var
        upvar $s string
        binary scan $string I var
        set string [string range $string 4 end]
    }
    
    proc SHORT_pack {s v} {
        upvar $v var
        upvar $s string
        append string [binary format S $var]
    }
    proc SHORT_unpack {s v} {
        upvar $v var
        upvar $s string
        binary scan $string S var
        set string [string range $string 2 end]
    }
    
    proc CHAR_pack {s v} {
        upvar $v var
        upvar $s string
        append string [string index $var 0]
    }
    proc CHAR_unpack {s v} {
        upvar $v var
        upvar $s string
        set var [string index $string 0]
        set string [string range $string 1 end]
    }
    
    proc UCHAR_pack {s v} {
        upvar $v var
        upvar $s string
        append string [string index $var 0]
    }
    proc UCHAR_unpack {s v} {
        upvar $v var
        upvar $s string
        set var [string index $string 0]
        set string [string range $string 1 end]
    }
    
    proc FLOAT_pack {s v} {
        upvar $v var
        upvar $s string
    }
    proc FLOAT_unpack {s v} {
        upvar $v var
        upvar $s string
    }
    
    proc DOUBLE_pack {s v} {
        upvar $v var
        upvar $s string
        error "can't handle DOUBLE"
    }
    proc DOUBLE_unpack {s v} {
        upvar $v var
        upvar $s string
        set string [string range $string 4 end]
    }
    
    proc QUADRUPLE_pack {s v} {
        upvar $v var
        upvar $s string
        error "can't handle QUADRUPLE"
    }
    proc QUADRUPLE_unpack {s v} {
        upvar $v var
        upvar $s string
        set string [string range $string 8 end]
    }
    
    proc HYPER_pack {s v} {
        upvar $v var
        upvar $s string
        error "can't handle HYPER"
    }
    proc HYPER_unpack {s v} {
        upvar $v var
        upvar $s string
        set string [string range $string 8 end]
    }
    
    proc HYPER_UNSIGNED_pack {s v} {
        upvar $v var
        upvar $s string
        error "can't handle UNSIGNED HYPER"
    }
    proc HYPER_UNSIGNED_unpack {s v} {
        upvar $v var
        upvar $s string
        set string [string range $string 8 end]
    }