#!/usr/bin/env perl

use warnings;
use strict;

use Data::Dumper;
use Getopt::Long qw(:config no_ignore_case);
use File::Path qw(make_path);
use XML::Simple qw(:strict);
use JSON;

# Pull in from the lib directory
use FindBin qw($RealBin);
use FindBin qw($RealScript);
use lib "$RealBin/lib";

use BitRange;

#-------------------------------------------------------------------------------
# Global Variables
#-------------------------------------------------------------------------------

# Supported file versions and their values.
my $FILE_VERSION =
{
    VER_01 => 0x01,
};

# This is a map of all currently supported models/ECs and their IDs.
my $SUPPORTED_MODEL_EC =
{
    EXPLORER_11 => { id => 0x60D20011, type => "ocmb", desc => "Explorer 1.1" },
    EXPLORER_20 => { id => 0x60D20020, type => "ocmb", desc => "Explorer 2.0" },
    P10_10      => { id => 0x20DA0010, type => "proc", desc => "P10 1.0" },
    P10_20      => { id => 0x20DA0020, type => "proc", desc => "P10 2.0" },
};

# All models/ECs that may exist in the XML, but no longer needs to be built.
# This is useful for build optimization and also help prevent build breaks when
# the XML still exists, but not needed anymore.
my $DEPRECATED_MODEL_EC = [];

# Supported register types and their values.
my $REGISTER_TYPE =
{
    SCOM   => { id => 0x01, addr_size => 4, reg_size => 8 },
    IDSCOM => { id => 0x02, addr_size => 8, reg_size => 8 },
};

# Supported attention types and their values.
my $ATTN_TYPE =
{
    CS  => [ 1, 'checkstop'         ], # System checkstop hardware attention
    UCS => [ 2, 'unit checkstop'    ], # Unit checkstop hardware attention
    RE  => [ 3, 'recoverable'       ], # Recoverable hardware attention
    SPA => [ 4, 'special attention' ], # event requiring action by the SP FW
    HA  => [ 5, 'host attention'    ], # event requiring action by the host FW
};

#-------------------------------------------------------------------------------
# Help function
#-------------------------------------------------------------------------------

sub help()
{
    print <<EOF;
Usage: $RealScript -h
       $RealScript -i <input_dir> -o <output_dir>

Generates specified data files from the input Chip Data XML.

General options:

 -h, --help     Prints this menu.
 -i, --input    Directory containing the Chip Data XML files.
 -o, --output   Directory that will contain the data files.

Data file options (must specify at least one):

 --cdb          Generates Chip Data Binary files.
 --json         Generates PEL Parser Data JSON files.

EOF

    exit;
}

#-------------------------------------------------------------------------------
# Input
#-------------------------------------------------------------------------------

help() unless @ARGV; # print help if no arguments

# Get options
my ( $help, $src_dir, $dest_dir, $gen_cdb, $gen_json );
help() unless GetOptions(
    'h|help'     => \$help,
    'i|input=s'  => \$src_dir,
    'o|output=s' => \$dest_dir,
    'cdb'        => \$gen_cdb,
    'json'       => \$gen_json,
);

help() if @ARGV; # print usage if there are extra arguments

# -h,--help
help() if ( $help );

# -i,--input
die "ERROR> Option -i required." unless ( defined $src_dir );
die "ERROR> '$src_dir' is not a directory" unless ( -d $src_dir );

# -o,--output
die "ERROR> Option -o required." unless ( defined $dest_dir );
make_path( $dest_dir, {error => \my $err} );
if ( @{$err} )
{
    my ( $file, $message ) = %{shift @{$err}};
    die "ERROR> $message: $file\n";
}

# --cdb, --json
unless ( $gen_cdb or $gen_json )
{
    die "ERROR> Must specify at least one data file option.";
}

#-------------------------------------------------------------------------------
# Prototypes
#-------------------------------------------------------------------------------

sub importXML($);
sub normalizeXML($);
sub buildDataFiles($$);

#-------------------------------------------------------------------------------
# Main
#-------------------------------------------------------------------------------

# Validate and import the XML.
my $chip_data_xml = importXML( $src_dir );

# There are some fields in the XML that are shorthand and need to be expanded
# before building the binary files.
my $normalized_data = normalizeXML( $chip_data_xml );

# The XML should now be in a format to start building the binary files.
buildDataFiles( $dest_dir, $normalized_data );

#-------------------------------------------------------------------------------
# Helper functions
#-------------------------------------------------------------------------------

sub FAIL($) { die( "ERROR> " . shift @_ ); }

#-------------------------------------------------------------------------------
# Import functions
#-------------------------------------------------------------------------------

# For each supported XML file in the given directory:
#   - Ensures the XML is well-formed.
#   - Ensures the XML validates against the schema.
#   - Imports the XML into Perl data structures.
sub importXML($)
{
    my ( $dir ) = @_;

    my $data = {};

    # Get a list of all the XML files.
    opendir DIR, $dir or die "Couldn't open dir '$dir': $!";
    my @files = grep { /^.+\.xml$/ } readdir DIR;
    closedir DIR;

    # Iterate each supported file type.
    for my $type ( "chip", "node" )
    {
        for my $file ( grep { /^$type\_.+\.xml$/ } @files )
        {
            my $path = "$dir/$file";

            # Ensure the XML is well-formed and validates against the schema.
            my $schema = "$RealBin/schema/$type.xsd";
            my $out = `xmllint --noout --schema $schema $path 2>&1`;
            die "$out\nRAS XML validation failed on $file" if ( 0 != $? );

            # Import the XML.
            my $xml = XMLin( $path, KeyAttr => {}, ForceArray => 1 );

            # Add the file path to the XML for error output.
            $xml->{path} = $path;

            # Push each file's data to a list for each file type.
            push @{$data->{$type}}, $xml;
        }
    }

    return $data;
}

#-------------------------------------------------------------------------------
# Normalize functions
#-------------------------------------------------------------------------------

# Takes a string of models/ECs separated by ',' and returns a list of supported
# models/ECs. See $SUPPORTED_MODEL_EC and $DEPRECATED_MODEL_EC.
sub __expandModelEc($)
{
    my ( $str ) = @_;

    my @list = split(/,/, $str);

    # Remove any deprecated models/ECs.
    for my $d ( @{$DEPRECATED_MODEL_EC} )
    {
        @list = grep { $d ne $_ } @list;
    }

    # Validate the remaining models/ECs.
    for my $m ( @list )
    {
        unless ( defined $SUPPORTED_MODEL_EC->{$m} )
        {
            FAIL("Unsupported model/EC: $m");
        }
    }

    return @list;
}

#-------------------------------------------------------------------------------

sub __getInstRange($)
{
    my ( $insts ) = @_;

    my $list = [];
    for ( @{$insts} ) { push @{$list}, $_->{reg_inst}; }

    @{$list} = sort {$a <=> $b} @{$list}; # Sort the list just in case.

    return BitRange::compress($list);
}

sub __getReg($$$$)
{
    my ( $inst_in, $reg_type, $name, $addr_mod ) = @_;

    my $inst_out = [];
    for ( @{$inst_in} )
    {
        my $addr = "";
        if ( "SCOM" eq $reg_type )
        {
            $addr = sprintf( "0x%08x", hex($_->{addr}) + $addr_mod );
        }
        elsif ( "IDSCOM" eq $reg_type )
        {
            # TODO: Need a portable way of handling 64-bit numbers.
            FAIL("IDSCOM address currently not supported");
        }
        else
        {
            FAIL("Unsupported register type for node: $name");
        }

        push @{$inst_out}, { reg_inst => $_->{reg_inst}, addr => $addr };
    }

    return { name => $name, instance => $inst_out };
}

sub __getExpr($$)
{
    my ( $name, $config ) = @_;

    # Get the register expression.
    my $expr = { type => 'reg', value1 => $name };

    if ( '0' eq $config )
    {
        # Take the NOT of the register expression.
        $expr = { type => 'not', expr => [ $expr ] };
    }

    return $expr;
}

sub __getAct($$$$)
{
    my ( $fir, $range, $type, $config ) = @_;

    FAIL("Invalid action config: $config") unless ( $config =~ /^[01]{2,3}$/ );

    my @c = split( //, $config );

    my $e = [];
    push( @{$e}, __getExpr("${fir}",      '1'     ) );
    push( @{$e}, __getExpr("${fir}_MASK", '0'     ) );
    push( @{$e}, __getExpr("${fir}_ACT0", shift @c) );
    push( @{$e}, __getExpr("${fir}_ACT1", shift @c) );
    push( @{$e}, __getExpr("${fir}_ACT2", shift @c) ) if ( 0 < scalar @c );

    return { node_inst => $range, attn_type => $type,
             expr => [ { type => 'and', expr => $e } ] };
}

#-------------------------------------------------------------------------------

sub __normalizeLocalFir($)
{
    my ( $node ) = @_;

    return unless ( defined $node->{local_fir} );

    # Note that the isolator will implicitly add all register referenced by the
    # rules to the capture group. To reduce redundancy and overall file size, we
    # won't add these registers to the capture group.

    $node->{register}      = [] unless ( defined $node->{register}      );
    $node->{rule}          = [] unless ( defined $node->{rule}          );

    for my $l ( @{$node->{local_fir}} )
    {
        my $n = $l->{name};
        my $i = $l->{instance};
        my $t = $node->{reg_type};

        my $inst_range = __getInstRange($i);

        my $r = [];
        push @{$r}, __getReg($i, $t, "${n}",      0);
        push @{$r}, __getReg($i, $t, "${n}_MASK", 3);
        push @{$r}, __getReg($i, $t, "${n}_ACT0", 6);
        push @{$r}, __getReg($i, $t, "${n}_ACT1", 7);
        push @{$r}, __getReg($i, $t, "${n}_WOF",  8) if ($l->{config} =~ /W/);
        push @{$r}, __getReg($i, $t, "${n}_ACT2", 9) if ($l->{config} =~ /2/);

        push @{$node->{register}}, @{$r};

        for ( @{$l->{action}} )
        {
            push @{$node->{rule}},
                 __getAct( $n, $inst_range, $_->{attn_type}, $_->{config} );
        }
    }

    delete $node->{local_fir};
}

#-------------------------------------------------------------------------------

# This is not very efficient, especially for large data structures. It is
# recommended to use Data::Compare, but that is not available on the pool
# machines.
sub __dirtyCompare($$)
{
    local $Data::Dumper::Terse    = 1;
    local $Data::Dumper::Indent   = 0;
    local $Data::Dumper::Sortkeys = 1;
    my ( $a, $b ) = ( Dumper(shift), Dumper(shift) );
    return $a eq $b;
}

#-------------------------------------------------------------------------------

sub __normalizeRegister($$)
{
    my ( $node, $regs ) = @_;

    return unless ( defined $node->{register} );

    # All of the registers will be put in the master register list for the chip.
    for my $r ( @{$node->{register}} )
    {
        # Set the default access if needed.
        $r->{access} = 'RW' unless ( defined $r->{access} );

        # Each register will keep track of its type.
        $r->{reg_type} = $node->{reg_type};

        for my $model_ec ( __expandModelEc($node->{model_ec}) )
        {
            if ( defined $regs->{$model_ec}->{$r->{name}} )
            {
                # This register already exists so check the contents for
                # accuracy
                unless ( __dirtyCompare($r, $regs->{$model_ec}->{$r->{name}}) )
                {
                    FAIL("Duplicate register: $r->{name}");
                }
            }
            else
            {
                # Add this node's register to the master register list.
                $regs->{$model_ec}->{$r->{name}} = $r;
            }
        }
    }

    # Clean up this node's register data.
    delete $node->{register};
}

#-------------------------------------------------------------------------------

sub __normalizeCaptureGroup($$)
{
    my ( $node, $insts_data ) = @_;

    # Capture groups are optional (although recommended).
    return unless ( defined $node->{capture_group} );

    for my $c ( @{$node->{capture_group}} )
    {
        # There must be at least one capture_register.
        unless ( defined $c->{capture_register} and
                 0 < scalar @{$c->{capture_register}} )
        {
            FAIL("<capture_group> for node $node->{name} does not contain at " .
                 "least one <capture_register>" );
        }

        my @node_insts = BitRange::expand($c->{node_inst});

        for my $r ( @{$c->{capture_register}} )
        {
            # node_inst and reg_inst must be the same size.
            my @reg_insts = BitRange::expand($r->{reg_inst});
            unless ( scalar @node_insts == scalar @reg_insts )
            {
                FAIL("capture_group/\@node_inst and capture_register/" .
                     "\@reg_inst list sized not equal for node $node->{name}");
            }

            # Expand the capture groups so there is one per node instance.
            for ( 0 .. (scalar @node_insts - 1) )
            {
                my ( $ni, $ri ) = ( $node_insts[$_], $reg_insts[$_] );
                push @{$insts_data->{$ni}->{capture_group}},
                     { reg_name => $r->{reg_name}, reg_inst => $ri };
            }
        }
    }

    # Clean up this node's capture group data.
    delete $node->{capture_group};
}

#-------------------------------------------------------------------------------

sub __normalizeExpr($$$$); # Called recursively

sub __normalizeExpr($$$$)
{
    my ( $in, $ni, $idx, $size ) = @_;

    my ( $t, $e, $v1, $v2 ) = ( $in->{type}, $in->{expr},
                                $in->{value1}, $in->{value2} );

    my $out = { type => $t };

    if ( "and" eq $t or "or" eq $t )
    {
        if ( defined $v1 or defined $v2 or
             not defined $e or not (0 < scalar @{$e}) )
        {
            FAIL("Invalid parameters for and/or expression");
        }

        # Iterate each sub expression.
        push @{$out->{expr}}, __normalizeExpr($_, $ni, $idx, $size) for (@{$e});
    }
    elsif ( "not" eq $t )
    {
        if ( defined $v1 or defined $v2 or
             not defined $e or not (1 == scalar @{$e}) )
        {
            FAIL("Invalid parameters for not expression");
        }

        # Iterate each sub expression.
        push @{$out->{expr}}, __normalizeExpr($_, $ni, $idx, $size) for (@{$e});
    }
    elsif ( "lshift" eq $t or "rshift" eq $t )
    {
        if ( not defined $v1 or defined $v2 or
             not defined $e or not (1 == scalar @{$e}) )
        {
            FAIL("Invalid parameters for lshift/rshift expression");
        }

        # Copy value1.
        $out->{value1} = $v1;

        # Iterate each sub expression.
        push @{$out->{expr}}, __normalizeExpr($_, $ni, $idx, $size) for (@{$e});
    }
    elsif ( "reg" eq $t )
    {
        if ( not defined $v1 or defined $e )
        {
            FAIL("Invalid parameters for reg expression");
        }

        # Copy value1.
        $out->{value1} = $v1;

        # value2 is optional in the XML, update the value to the node or
        # register instance.
        if ( defined $v2 )
        {
            my @reg_insts = BitRange::expand($v2);
            unless ( $size == scalar @reg_insts )
            {
                FAIL("reg expression value2:$v2 list not the same ".
                     "size as containing node's rule instances:$size");
            }

            $out->{value2} = $reg_insts[$idx];
        }
        else
        {
            # The register instance is the same as the node instance.
            $out->{value2} = $ni;
        }
    }
    elsif ( "int" eq $t )
    {
        if ( not defined $v1 or defined $v2 or defined $e )
        {
            FAIL("Invalid parameters for int expression");
        }

        # Copy value1.
        $out->{value1} = $v1;
    }
    else
    {
        FAIL("Unsupported expression type: $t");
    }

    return $out;
}

#-------------------------------------------------------------------------------

sub __normalizeRule($$)
{
    my ( $node, $insts_data ) = @_;

    # There should be only one rule per attention type and node instance for
    # this node.
    my $rule_dups = {};

    for my $r ( @{$node->{rule}} )
    {
        # There should be exactly one parent expression.
        unless ( 1 == scalar @{$r->{expr}} )
        {
            FAIL("Multiple parent expressions for rule: $node->{name} " .
                 "$r->{attn_type}");
        }
        my $expr = $r->{expr}->[0];

        my @node_insts = BitRange::expand($r->{node_inst});
        my $sz_insts = scalar @node_insts;

        # Expand the expression for each node instance.
        for my $idx ( 0 .. ($sz_insts - 1) )
        {
            my $ni = $node_insts[$idx];

            # Check for duplicates.
            if ( defined $rule_dups->{$r->{attn_type}}->{$ni} )
            {
                FAIL("Duplicate rule: $node->{name} $r->{attn_type} $ni");
            }
            else
            {
                $rule_dups->{$r->{attn_type}}->{$ni} = 1;
            }

            # Add the rule for this expression.
            push @{$insts_data->{$ni}->{rule}},
                 { attn_type => $r->{attn_type},
                   expr      => __normalizeExpr($expr, $ni, $idx, $sz_insts) };
        }
    }

    # Clean up this node's rule data.
    delete $node->{rule};
}

#-------------------------------------------------------------------------------

sub __normalizeBit($$$)
{
    my ( $node, $sigs, $insts_data ) = @_;

    my @node_insts = sort {$a <=> $b} keys %{$insts_data};
    my $sz_insts = scalar @node_insts;

    # There should be only one child node per node instance bit position.
    my $child_dups = {};

    for my $b ( sort {$a->{pos} cmp $b->{pos}} @{$node->{bit}} )
    {
        my @child_insts = ();

        # Ensure child_node and node_inst are set properly.
        if ( defined $b->{child_node} )
        {
            # Ensure each bit has a default node_inst attribute if needed.
            $b->{node_inst} = "0" unless ( defined $b->{node_inst} );

            # Get all of the instances for this child node.
            @child_insts = BitRange::expand($b->{node_inst});

            # Both inst list must be equal in size.
            unless ( $sz_insts == scalar @child_insts )
            {
                FAIL("node_inst attribute list size for node:$node->{name} " .
                     "bit:$b->{pos} does not match node instances " .
                     "represented by the <rule> element");
            }
        }
        elsif ( defined $b->{node_inst} )
        {
            FAIL("node_inst attribute exists for node:$node->{name} " .
                 "bit:$b->{pos} with no child_node attribute");
        }

        # Get the signatures for each node, instance, and bit position.
        for my $p ( BitRange::expand($b->{pos}) )
        {
            for my $i ( 0 .. ($sz_insts-1) )
            {
                my ( $n, $ni ) = ( $node->{name}, $node_insts[$i] );

                # This is to cover a bug in the figtree information where there
                # currently is no comment for some bits.
                $b->{content} = "" unless ( defined $b->{content} );

                for my $model_ec ( __expandModelEc($node->{model_ec}) )
                {
                    # Check if this signature already exists.
                    if ( defined $sigs->{$model_ec}->{$n}->{$ni}->{$p} and
                        $b->{content} ne $sigs->{$model_ec}->{$n}->{$ni}->{$p} )
                    {
                        FAIL("Duplicate signature for $n $ni $p");
                    }

                    # Get the signatures for each node, instance, and bit
                    # position.
                    $sigs->{$model_ec}->{$n}->{$ni}->{$p} = $b->{content};
                }

                # Move onto the next instance unless a child node exists.
                next unless ( defined $b->{child_node} );

                my $pi = $child_insts[$i];

                my $child = { pos        => $p,
                              child_node => $b->{child_node},
                              node_inst  => $pi };

                # Ensure this child node doesn't already exist.
                if ( defined $child_dups->{$ni}->{$p} and
                     not __dirtyCompare($child, $child_dups->{$ni}->{$p}) )
                {
                    FAIL("Duplicate child_node for $n $ni $p");
                }

                # Add this child node.
                push @{$insts_data->{$ni}->{bit}}, $child;
            }
        }
    }

    # Clean up this node's bit data.
    delete $node->{bit};
}

#-------------------------------------------------------------------------------

sub __normalizeNode($$$)
{
    my ( $node, $regs, $sigs ) = @_;

    # Ensure a valid register type.
    unless ( grep { /^$node->{reg_type}$/ } keys %{$REGISTER_TYPE} )
    {
        FAIL( "Unsupported register type: $node->{reg_type}" );
    }

    my $insts_data = {}; # Collect data for each instance of this node.

    # First, expand the <local_fir> data if it exists.
    __normalizeLocalFir($node);

    # All registers will be put in a master register list for the chip.
    __normalizeRegister($node, $regs);

    # Split the capture group information per node instance.
    __normalizeCaptureGroup($node, $insts_data);

    my $is_rule = (defined $node->{rule} and 0 < scalar @{$node->{rule}}) ? 1 : 0;
    my $is_bit  = (defined $node->{bit}  and 0 < scalar @{$node->{bit}})  ? 1 : 0;

    # If a rule is defined, a bit must be defined as well. It is possible for
    # neither to be defined (FFDC-only node).
    if ( $is_rule and $is_bit )
    {
        # Split the rule information per node instance. The sorted instance list
        # will be used as indexes for the node_inst attribute of the <bit>
        # elements.
        __normalizeRule($node, $insts_data);

        # Finally, collect the signature details and split the bit information
        # per node instance.
        __normalizeBit($node, $sigs, $insts_data);
    }
    elsif ( $is_rule or $is_bit )
    {
        # One is defined and the other is not. This is an error.
        FAIL("Node $node->{name} has a bit or rule defined and the other is not.");
    }

    # Now that we have all of the node data, collapse the instance data into
    # a list. Note that sort order doesn't matter. Only used for consistency.
    for ( sort keys %{$insts_data} )
    {
        $insts_data->{$_}->{node_inst} = $_;
        push @{$node->{instance}}, $insts_data->{$_};
    }
}

#-------------------------------------------------------------------------------

sub normalizeXML($)
{
    my ( $xml ) = @_;

    my $data = {};

    # Iterate each chip file.
    for my $chip ( @{$xml->{chip}} )
    {
        # Iterate each model/EC.
        for my $model_ec ( __expandModelEc($chip->{model_ec}) )
        {
            # Ensure there is not a duplicate definition for a model/EC.
            if ( $data->{$model_ec}->{chip} )
            {
                FAIL("Duplicate data for model/EC $model_ec in:\n" .
                     "    $data->{$model_ec}->{chip}->{path}\n" .
                     "    $chip->{path}");
            }

            # Add this chip to the data.
            $data->{$model_ec}->{attn_tree} = $chip->{attn_tree};
        }
    }

    # Extract the data for each node.
    my ( $regs, $sigs, $node_dups ) = ( {}, {}, {} );
    for my $node ( sort { $a->{name} cmp $b->{name} } @{$xml->{node}} )
    {
        # A node may be defined for more than one model/EC.
        for my $model_ec ( __expandModelEc($node->{model_ec}) )
        {
            # A node can only be defined once per model/EC.
            if ( defined $node_dups->{$model_ec}->{$node->{name}} )
            {
                FAIL( "Duplicate node defined for $model_ec -> $node->{name} ");
            }
            else
            {
                $node_dups->{$model_ec}->{$node->{name}} = 1;
            }

            # Initialize the master list of registers and signatures of this
            # model/EC, if necessary.

            $regs->{$model_ec} = {} unless ( defined $regs->{$model_ec} );
            $sigs->{$model_ec} = {} unless ( defined $sigs->{$model_ec} );
        }

        # The same node content will be used for each model/EC characterized by
        # this node. There is some normalization that needs to happen because of
        # shorthand elements, like <local_fir>, and some error checking. This
        # only needs to be done once per node, not per model/EC.
        __normalizeNode( $node, $regs, $sigs );

        # Push the node data for each model/EC.
        for my $model_ec ( __expandModelEc($node->{model_ec}) )
        {
            push @{$data->{$model_ec}->{node}}, $node;
        }
    }

    # Sort and collapse the master register list.
    for my $m ( keys %{$regs} )
    {
        for my $n ( sort keys %{$regs->{$m}} )
        {
            push @{$data->{$m}->{register}}, $regs->{$m}->{$n};
        }
    }

    # Collapse the signature lists.
    for my $m ( keys %{$sigs} )
    {
        for my $n ( sort keys %{$sigs->{$m}} )
        {
            for my $i ( sort {$a <=> $b} keys %{$sigs->{$m}->{$n}} )
            {
                for my $b ( sort {$a <=> $b} keys %{$sigs->{$m}->{$n}->{$i}} )
                {
                    push @{$data->{$m}->{signature}},
                         { name => $n, inst => $i, bit => $b,
                           desc => $sigs->{$m}->{$n}->{$i}->{$b} };
                }
            }
        }
    }

    return $data;
}

#-------------------------------------------------------------------------------
# Output functions
#-------------------------------------------------------------------------------

# The $num passed into this function can be a numeric of string. All values are
# converted to a hex string and then into the binary format. This helps avoid
# portability issues with endianess. Requirements:
#  - Hex strings must start with '0x'.
#  - For portability, 64-bit numbers must be passed as a hex string.
sub __bin($$$)
{
    my ( $fh, $bytes, $num ) = @_;

    # $bytes must be a positive integer.
    die "Invalid bytes: $bytes" unless ( 0 < $bytes );

    my $str = ''; # Default invalid string

    my $char = $bytes * 2; # Number of characters in the string.

    # Check if $num is a hex string.
    if ( $num =~ /^0[x|X](.*)/ )
    {
        $str = $1; # strip the '0x'
    }
    # Check if $num is string or numeric decimal integer (32-bit max).
    elsif ( $num =~ /^[0-9]+$/ and $bytes <= 4 )
    {
        $str = sprintf("%0${char}x", $num); # Convert to hex string
    }

    # Check for a hex number with the valid size.
    unless ( $str =~ /^[0-9a-fA-F]{$char}$/ )
    {
        die "Invalid number: $num (size: $bytes)";
    }

    # Print the binary string.
    print $fh pack( "H$char", $str );
}

#-------------------------------------------------------------------------------

sub __hash($$)
{
    my $bytes = shift;
    my @str   = unpack("C*", shift); # returns an array of ascii values

    # Currently only supporting 1, 2, 3, and 4 byte hashes.
    unless ( 1 <= $bytes and $bytes <= 4 )
    {
        FAIL("Unsupported hash size: $bytes");
    }

    # Add padding to the end of the character array so that the size is
    # divisible by $bytes.
    push @str, 0 until ( 0 == scalar(@str) % $bytes );

    # This hash is a simple "n*s[0] + (n-1)*s[1] + ... + s[n-1]" algorithm,
    # where s[i] is a $bytes size chunk of the input string.

    my ( $sumA, $sumB ) = ( 0, 0 );
    while ( my @chunk = splice @str, 0, $bytes )
    {
        # Combine the chunk array into a single value.
        my $val = 0; for ( @chunk ) { $val <<= 8; $val |= $_; }

        # Apply the simple hash.
        $sumA += $val;
        $sumB += $sumA;
    }

    # Mask off everything except the target number of bytes.
    $sumB &= 0xffffffff >> ((4 - $bytes) * 8);

    return $sumB;
}

#-------------------------------------------------------------------------------

sub __printRegisters($$)
{
    my ( $fh, $data ) = @_;

    my $num_regs = scalar @{$data};
    FAIL("No registers defined") unless ( 0 < $num_regs );

    # Register list metadata
    __bin($fh, 1, $_) for ( unpack("C*", "REGS") );
    __bin($fh, 3, $num_regs);

    my $reg_ids = {}; # for hash duplicate checking

    for my $r ( @{$data} )
    {
        # Get the hash of the register name and check for duplicates.
        my $id = __hash(3, $r->{name});
        if ( defined $reg_ids->{$id} )
        {
            FAIL("Duplicate register ID hash " . sprintf('0x%08x', $id) .
                 " for $r->{name} and $reg_ids->{$id}");
        }
        else
        {
            $reg_ids->{$id} = $r->{name};
        }

        # Get the attribute flags.
        my $flags = 0x00;
        $flags |= 0x80 if ( $r->{access} =~ /R/ );
        $flags |= 0x40 if ( $r->{access} =~ /W/ );

        # Get the number of address instances.
        my $num_inst = scalar @{$r->{instance}};
        unless ( 0 < $num_inst )
        {
            FAIL("No register instances defined for $r->{name}");
        }

        # Register metadata
        __bin($fh, 3, $id      );
        __bin($fh, 1, $REGISTER_TYPE->{$r->{reg_type}}->{id});
        __bin($fh, 1, $flags   );
        __bin($fh, 1, $num_inst);

        for my $i ( @{$r->{instance}} )
        {
            my $s = $REGISTER_TYPE->{$r->{reg_type}}->{addr_size};

            # Register Instance metadata
            __bin($fh,  1, $i->{reg_inst});
            __bin($fh, $s, $i->{addr}    );
        }
    }
}

#-------------------------------------------------------------------------------

sub __printExpr($$$);

sub __printExpr($$$)
{
    my ( $fh, $size, $expr ) = @_;

    my ( $t, $e, $v1, $v2 ) = ( $expr->{type}, $expr->{expr},
                                $expr->{value1}, $expr->{value2} );

    if ( "reg" eq $t )
    {
        __bin($fh, 1, 0x01);            # expression type for "reg"
        __bin($fh, 3, __hash(3,$v1));   # register id
        __bin($fh, 1, $v2);             # register instance
    }
    elsif ( "int" eq $t )
    {
        __bin($fh,     1, 0x02);    # expression type for "int"
        __bin($fh, $size, $v1);     # integer value
    }
    elsif ( "and" eq $t )
    {
        __bin($fh, 1, 0x10);                        # expression type for "and"
        __bin($fh, 1, scalar @{$e});                # number of sub-expressions
        __printExpr($fh, $size, $_) for ( @{$e} );  # add each sub-expression
    }
    elsif ( "or" eq $t )
    {
        __bin($fh, 1, 0x11);                        # expression type for "or"
        __bin($fh, 1, scalar @{$e});                # number of sub-expressions
        __printExpr($fh, $size, $_) for ( @{$e} );  # add each sub-expression
    }
    elsif ( "not" eq $t )
    {
        __bin($fh, 1, 0x12);                # expression type for "not"
        __printExpr($fh, $size, $e->[0]);   # add only sub-expression
    }
    elsif ( "lshift" eq $t )
    {
        __bin($fh, 1, 0x13);                # expression type for "lshift"
        __bin($fh, 1, $v1);                 # shift amount
        __printExpr($fh, $size, $e->[0]);   # add only sub-expression
    }
    elsif ( "rshift" eq $t )
    {
        __bin($fh, 1, 0x14);                # expression type for "rshift"
        __bin($fh, 1, $v1);                 # shift amount
        __printExpr($fh, $size, $e->[0]);   # add only sub-expression
    }
}

#-------------------------------------------------------------------------------

sub __printNodes($$)
{
    my ( $fh, $data ) = @_;

    my $num_nodes = scalar @{$data};
    FAIL("No nodes defined") unless ( 0 < $num_nodes );

    # Isolation Node list metadata
    __bin($fh, 1, $_) for ( unpack("C*", "NODE") );
    __bin($fh, 2, $num_nodes);

    my $node_ids = {}; # for hash duplicate checking

    for my $n ( @{$data} )
    {
        # Get the hash of the node name and check for duplicates.
        my $id = __hash(2, $n->{name});
        if ( defined $node_ids->{$id} )
        {
            FAIL("Duplicate node ID hash " . sprintf('0x%08x', $id) .
                 " for $n->{name} and $node_ids->{$id}");
        }
        else
        {
            $node_ids->{$id} = $n->{name};
        }

        my $num_insts = scalar @{$n->{instance}};
        unless ( 0 < $num_insts )
        {
            FAIL("No nodes instances defined for $n->{name}");
        }

        my $reg_type = $REGISTER_TYPE->{$n->{reg_type}}->{id};
        my $reg_size = $REGISTER_TYPE->{$n->{reg_type}}->{reg_size};

        # Register metadata
        __bin($fh, 2, $id);
        __bin($fh, 1, $reg_type);
        __bin($fh, 1, $num_insts);

        for my $i ( @{$n->{instance}} )
        {
            # Capture groups are optional.
            my $num_cap_regs = (defined $i->{capture_group})
                                    ? scalar @{$i->{capture_group}} : 0;

            # Rules may not exist for this node.
            my $num_rules = (defined $i->{rule}) ? scalar @{$i->{rule}} : 0;

            # Child nodes may not exist for this node.
            my $num_bit = (defined $i->{bit}) ? scalar @{$i->{bit}} : 0;

            # Register instance metadata
            __bin($fh, 1, $i->{node_inst});
            __bin($fh, 1, $num_cap_regs  );
            __bin($fh, 1, $num_rules     );
            __bin($fh, 1, $num_bit       );

            if ( 0 < $num_cap_regs )
            {
                for my $cg ( @{$i->{capture_group}} )
                {
                    # Register capture register metadata
                    __bin($fh, 3, __hash(3, $cg->{reg_name}));
                    __bin($fh, 1, $cg->{reg_inst}           );
                }
            }

            if ( 0 < $num_rules )
            {
                for my $r ( @{$i->{rule}} )
                {
                    # Register rule metadata
                    __bin($fh, 1, $ATTN_TYPE->{$r->{attn_type}}->[0]);
                    __printExpr($fh, $reg_size, $r->{expr});
                }
            }

            if ( 0 < $num_bit )
            {
                for my $b ( @{$i->{bit}} )
                {
                    # Register child node metadata
                    __bin($fh, 1, $b->{pos}                  );
                    __bin($fh, 2, __hash(2, $b->{child_node}));
                    __bin($fh, 1, $b->{node_inst}            );
                }
            }
        }
    }
}

#-------------------------------------------------------------------------------

sub __printAttnTree($$)
{
    my ( $fh, $data ) = @_;

    my $num_root_nodes = scalar @{$data};
    FAIL("No root nodes defined") unless ( 0 < $num_root_nodes );

    # Root Node list metadata
    __bin($fh, 1, $_) for ( unpack("C*", "ROOT") );
    __bin($fh, 1, $num_root_nodes);

    for my $r ( @{$data} )
    {
        # Root Node metadata
        __bin($fh, 1, $ATTN_TYPE->{$r->{attn_type}}->[0]);
        __bin($fh, 2, __hash(2, $r->{root_node})        );
        __bin($fh, 1, $r->{node_inst}                   );
    }
}

#-------------------------------------------------------------------------------

sub __printParserData($$$$)
{
    my ( $fh, $model_ec, $sig_list, $reg_list) = @_;

    # IMPORTANT: All hash keys with hex values must be lowercase.

    my $attns = {};
    my $regs  = {};
    my $sigs  = {};

    # Get the chip info.
    my $info = $SUPPORTED_MODEL_EC->{$model_ec};
    $info->{'id'} = sprintf('%08x', $info->{'id'});

    # Get the list of attention types.
    while ( my ($k, $v) = each %{$ATTN_TYPE} )
    {
        $attns->{$v->[0]} = $v->[1];
    }

    # Get the signature data.
    for my $s ( @{$sig_list} )
    {
        # Format is:
        #   { id : [ name, { bit : desc, ... } ], ... }

        # The ID is a 2-byte hash of the node name (lowercase).
        my $id = sprintf('%04x', __hash(2, $s->{name}));

        if ( exists($sigs->{$id}) )
        {
            # Check for hash collisions.
            if ($sigs->{$id}->[0] ne $s->{name} )
            {
                FAIL("Node hash collision for $id: $sigs->{$id}->[0] " .
                     "and $s->{name}");
            }
        }
        else
        {
            # Initialize this node.
            $sigs->{$id} = [ $s->{name}, {} ];
        }

        # Check for signature collisions.
        if ( exists($sigs->{$id}->[1]->{$s->{bit}}) )
        {
            # Check for signature collisions.
            if ( $sigs->{$id}->[1]->{$s->{bit}} ne $s->{desc} )
            {
                FAIL("Multiple signatures for $s->{name} bit $s->{bit}:\n" .
                     "  $sigs->{$id}->[1]->{$s->{bit}}\n" .
                     "  $s->{desc}");
            }
        }
        else
        {
            # Set the signature for this bit.
            $sigs->{$id}->[1]->{$s->{bit}} = $s->{desc};
        }
    }

    # Get the register data.
    for my $r ( @{$reg_list} )
    {
        # Format is:
        #   { id : [ name, { inst : addr, ... } ], ... }

        # The ID is a 3-byte hash of the register name (lowercase).
        my $id = sprintf('%06x', __hash(3, $r->{name}));

        if ( exists($regs->{$id}) )
        {
            # Check for hash collisions.
            if ( $regs->{$id}->[0] ne $r->{name} )
            {
                FAIL("Register hash collision for $id: " .
                     "$regs->{$id}->[0] and $r->{name}");
            }
        }
        else
        {
            # Initialize this register.
            $regs->{$id} = [ $r->{name}, {} ];
        }

        # Add the address for each instance of the register (shouldn't have to
        # worry about duplicates).
        for ( @{$r->{instance}} )
        {
            $regs->{$id}->[1]->{$_->{reg_inst}} = $_->{addr};
        }
    }

    my $data =
    {
        'model_ec'   => $info,
        'attn_types' => $attns,
        'registers'  => $regs,
        'signatures' => $sigs,
    };

    print $fh to_json( $data, {utf8 => 1, pretty => 1, canonical => 1} );
}

#-------------------------------------------------------------------------------

sub buildDataFiles($$)
{
    my ( $dir, $data ) = @_;

    while ( my ($model_ec, $chip) = each %{$data} )
    {
        unless ( defined $chip->{register} )
        {
            FAIL("Chip $model_ec does not contain registers");
        }
        unless ( defined $chip->{node} )
        {
            FAIL("Chip $model_ec does not contain nodes");
        }
        unless ( defined $chip->{attn_tree} )
        {
            FAIL("Chip $model_ec does not contain attn_tree information");
        }
        unless ( defined $chip->{signature} )
        {
            FAIL("Chip $model_ec does not contain signatures");
        }

        # Chip Data Binary files ###############################################

        if ( $gen_cdb )
        {
            my $bin_file = "$dir/chip_data_" . lc $model_ec . ".cdb";
            open my $bin_fh, '>', $bin_file or die "Cannot open $bin_file: $!";
            binmode $bin_fh; # writes a binary file

            # Chip Data File metadata
            __bin($bin_fh, 1, $_) for ( unpack("C*", "CHIPDATA") );
            __bin($bin_fh, 4, $SUPPORTED_MODEL_EC->{$model_ec}->{id});
            __bin($bin_fh, 1, $FILE_VERSION->{VER_01}               );

            __printRegisters( $bin_fh, $chip->{register}  );
            __printNodes(     $bin_fh, $chip->{node}      );
            __printAttnTree(  $bin_fh, $chip->{attn_tree} );

            close $bin_fh;
        }

        # eBMC PEL parsing JSON ################################################

        if ( $gen_json )
        {
            my $file = "$dir/pel_parser_data_" . lc $model_ec . ".json";
            open my $fh, '>', $file or die "Cannot open $file: $!";

            __printParserData( $fh, $model_ec, $chip->{signature},
                               $chip->{register} );

            close $fh;
        }
    }
}

