Blame - poky/meta/recipes-devtools/apt/apt/triehash - openbmc/openbmc

blob: b08bc6ec30006be3cfccd759e73df33743e4a09f [file] [log] [blame]

Andrew Geissler	4ed12e1	2020-06-05 18:00:41 -0500	[diff] [blame^]	1	#!/usr/bin/perl -w
				2	#
				3	# Copyright (C) 2016 Julian Andres Klode <jak@jak-linux.org>
				4	#
				5	# Permission is hereby granted, free of charge, to any person obtaining a copy
				6	# of this software and associated documentation files (the "Software"), to deal
				7	# in the Software without restriction, including without limitation the rights
				8	# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
				9	# copies of the Software, and to permit persons to whom the Software is
				10	# furnished to do so, subject to the following conditions:
				11	#
				12	# The above copyright notice and this permission notice shall be included in
				13	# all copies or substantial portions of the Software.
				14	#
				15	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				16	# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				17	# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				18	# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				19	# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				20	# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
				21	# THE SOFTWARE.
				22
				23	=encoding utf8
				24
				25	=head1 NAME
				26
				27	triehash - Generate a perfect hash function derived from a trie.
				28
				29	=cut
				30
				31	use strict;
				32	use warnings;
				33	use utf8;
				34	use Getopt::Long;
				35
				36	=head1 SYNOPSIS
				37
				38	B<triehash> [S<I<option>>] [S<I<input file>>]
				39
				40	=head1 DESCRIPTION
				41
				42	triehash takes a list of words in input file and generates a function and
				43	an enumeration to describe the word
				44
				45	=head1 INPUT FILE FORMAT
				46
				47	The file consists of multiple lines of the form:
				48
				49	[label ~ ] word [= value]
				50
				51	This maps word to value, and generates an enumeration with entries of the form:
				52
				53	label = value
				54
				55	If I<label> is undefined, the word will be used, the minus character will be
				56	replaced by an underscore. If value is undefined it is counted upwards from
				57	the last value.
				58
				59	There may also be one line of the format
				60
				61	[ label ~] = value
				62
				63	Which defines the value to be used for non-existing keys. Note that this also
				64	changes default value for other keys, as for normal entries. So if you place
				65
				66	= 0
				67
				68	at the beginning of the file, unknown strings map to 0, and the other strings
				69	map to values starting with 1. If label is not specified, the default is
				70	I<Unknown>.
				71
				72	=head1 OPTIONS
				73
				74	=over 4
				75
				76	=item B<-C>I<.c file> B<--code>=I<.c file>
				77
				78	Generate code in the given file.
				79
				80	=item B<-H>I<header file> B<--header>=I<header file>
				81
				82	Generate a header in the given file, containing a declaration of the hash
				83	function and an enumeration.
				84
				85	=item B<--enum-name=>I<word>
				86
				87	The name of the enumeration.
				88
				89	=item B<--function-name=>I<word>
				90
				91	The name of the function.
				92
				93	=item B<--label-prefix=>I<word>
				94
				95	The prefix to use for labels.
				96
				97	=item B<--label-uppercase>
				98
				99	Uppercase label names when normalizing them.
				100
				101	=item B<--namespace=>I<name>
				102
				103	Put the function and enum into a namespace (C++)
				104
				105	=item B<--class=>I<name>
				106
				107	Put the function and enum into a class (C++)
				108
				109	=item B<--enum-class>
				110
				111	Generate an enum class instead of an enum (C++)
				112
				113	=item B<--counter-name=>I<name>
				114
				115	Use I<name> for a counter that is set to the latest entry in the enumeration
				116	+ 1. This can be useful for defining array sizes.
				117
				118	=item B<--ignore-case>
				119
				120	Ignore case for words.
				121
				122	=item B<--multi-byte>=I<value>
				123
				124	Generate code reading multiple bytes at once. The value is a string of power
				125	of twos to enable. The default value is 320 meaning that 8, 4, and single byte
				126	reads are enabled. Specify 0 to disable multi-byte completely, or add 2 if you
				127	also want to allow 2-byte reads. 2-byte reads are disabled by default because
				128	they negatively affect performance on older Intel architectures.
				129
				130	This generates code for both multiple bytes and single byte reads, but only
				131	enables the multiple byte reads of GNU C compatible compilers, as the following
				132	extensions are used:
				133
				134	=over 8
				135
				136	=item Byte-aligned integers
				137
				138	We must be able to generate integers that are aligned to a single byte using:
				139
				140	typedef uint64_t __attribute__((aligned (1))) triehash_uu64;
				141
				142	=item Byte-order
				143
				144	The macros __BYTE_ORDER__ and __ORDER_LITTLE_ENDIAN__ must be defined.
				145
				146	=back
				147
				148	We forcefully disable multi-byte reads on platforms where the variable
				149	I<__ARM_ARCH> is defined and I<__ARM_FEATURE_UNALIGNED> is not defined,
				150	as there is a measurable overhead from emulating the unaligned reads on
				151	ARM.
				152
				153	=item B<--language=>I<language>
				154
				155	Generate a file in the specified language. Currently known are 'C' and 'tree',
				156	the latter generating a tree.
				157
				158	=item B<--include=>I<header>
				159
				160	Add the header to the include statements of the header file. The value must
				161	be surrounded by quotes or angle brackets for C code. May be specified multiple
				162	times.
				163
				164	=back
				165
				166	=cut
				167
				168	my $unknown = -1;
				169	my $unknown_label = undef;
				170	my $counter_start = 0;
				171	my $enum_name = 'PerfectKey';
				172	my $function_name = 'PerfectHash';
				173	my $enum_class = 0;
				174
				175	my $code_name = '-';
				176	my $header_name = '-';
				177	my $code;
				178	my $header;
				179	my $label_prefix = undef;
				180	my $label_uppercase = 0;
				181	my $ignore_case = 0;
				182	my $multi_byte = '320';
				183	my $language = 'C';
				184	my $counter_name = undef;
				185	my @includes = ();
				186
				187
				188	Getopt::Long::config('default',
				189	'bundling',
				190	'no_getopt_compat',
				191	'no_auto_abbrev',
				192	'permute',
				193	'auto_help');
				194
				195	GetOptions ('code\|C=s' => \$code_name,
				196	'header\|H=s' => \$header_name,
				197	'function-name=s' => \$function_name,
				198	'label-prefix=s' => \$label_prefix,
				199	'label-uppercase' => \$label_uppercase,
				200	'ignore-case' => \$ignore_case,
				201	'enum-name=s' => \$enum_name,
				202	'language\|l=s' => \$language,
				203	'multi-byte=s' => \$multi_byte,
				204	'enum-class' => \$enum_class,
				205	'include=s' => \@includes,
				206	'counter-name=s' => \$counter_name)
				207	or die('Could not parse options!');
				208
				209
				210	# This implements a simple trie. Each node has three attributes:
				211	#
				212	# children - A hash of keys to other nodes
				213	# value - The value to be stored here
				214	# label - A named representation of the value.
				215	#
				216	# The key at each level of the trie can consist of one or more bytes, and the
				217	# trie can be normalized to a form where all keys at a level have the same
				218	# length using rebuild_tree().
				219	package Trie {
				220
				221	sub new {
				222	my $class = shift;
				223	my $self = {};
				224	bless $self, $class;
				225
				226	$self->{children} = {};
				227	$self->{value} = undef;
				228	$self->{label} = undef;
				229
				230	return $self;
				231	}
				232
				233	# Return the largest power of 2 smaller or equal to the argument
				234	sub alignpower2 {
				235	my ($self, $length) = @_;
				236
				237	return 8 if ($length >= 8 && $multi_byte =~ /3/);
				238	return 4 if ($length >= 4 && $multi_byte =~ /2/);
				239	return 2 if ($length >= 2 && $multi_byte =~ /1/);
				240
				241	return 1;
				242	}
				243
				244	# Split the key into a head block and a tail
				245	sub split_key {
				246	my ($self, $key) = @_;
				247	my $length = length $key;
				248	my $split = $self->alignpower2($length);
				249
				250	return (substr($key, 0, $split), substr($key, $split));
				251	}
				252
				253	# Given a key, a label, and a value, insert that into the tree, possibly
				254	# replacing an existing node.
				255	sub insert {
				256	my ($self, $key, $label, $value) = @_;
				257
				258	if (length($key) == 0) {
				259	$self->{label} = $label;
				260	$self->{value} = $value;
				261	return;
				262	}
				263
				264	my ($child, $tail) = $self->split_key($key);
				265
				266	$self->{children}{$child} = Trie->new if (!defined($self->{children}{$child}));
				267
				268	$self->{children}{$child}->insert($tail, $label, $value);
				269	}
				270
				271	# Construct a new trie that only contains words of a given length. This
				272	# is used to split up the common trie after knowing all words, so we can
				273	# switch on the expected word length first, and have the per-trie function
				274	# implement simple longest prefix matching.
				275	sub filter_depth {
				276	my ($self, $togo) = @_;
				277
				278	my $new = Trie->new;
				279
				280	if ($togo != 0) {
				281	my $found = 0;
				282	foreach my $key (sort keys %{$self->{children}}) {
				283	if ($togo > length($key) \|\| defined $self->{children}{$key}->{value}) {
				284	my $child = $self->{children}{$key}->filter_depth($togo - length($key));
				285
				286	$new->{children}{$key}= $child if defined $child;
				287	$found = 1 if defined $child;
				288	}
				289	}
				290	return if (!$found);
				291	} else {
				292	$new->{value} = $self->{value};
				293	$new->{label} = $self->{label};
				294	}
				295
				296	return $new;
				297	}
				298
				299	# (helper for rebuild_tree)
				300	# Reinsert all value nodes into the specified $trie, prepending $prefix
				301	# to their $paths.
				302	sub reinsert_value_nodes_into {
				303	my ($self, $trie, $prefix) = @_;
				304
				305	$trie->insert($prefix, $self->{label}, $self->{value}) if (defined $self->{value});
				306
				307	foreach my $key (sort keys %{$self->{children}}) {
				308	$self->{children}{$key}->reinsert_value_nodes_into($trie, $prefix . $key);
				309	}
				310	}
				311
				312	# (helper for rebuild_tree)
				313	# Find the earliest point to split a key. Normally, we split at the maximum
				314	# power of 2 that is greater or equal than the length of the key. When we
				315	# are building an ASCII-optimised case-insensitive trie that simply ORs
				316	# each byte with 0x20, we need to split at the first ambiguous character:
				317	#
				318	# For example, the words a-bc and a\rbc are identical in such a situation:
				319	# '-' \| 0x20 == '-' == '\r' \| 0x20
				320	# We cannot simply switch on all 4 bytes at once, but need to split before
				321	# the ambiguous character so we can process the ambiguous character on its
				322	# own.
				323	sub find_earlier_split {
				324	my ($self, $key) = @_;
				325
				326	if ($ignore_case) {
				327	for my $i (0..length($key)-1) {
				328	# If the key starts with an ambiguous character, we need to
				329	# take only it. Otherwise, we need to take everything
				330	# before the character.
				331	return $self->alignpower2($i \|\| 1) if (main::ambiguous(substr($key, $i, 1)));
				332	}
				333	}
				334	return $self->alignpower2(length $key);
				335	}
				336
				337	# This rebuilds the trie, splitting each key before ambiguous characters
				338	# as explained in find_earlier_split(), and then chooses the smallest
				339	# such split at each level, so that all keys at all levels have the same
				340	# length (so we can use a multi-byte switch).
				341	sub rebuild_tree {
				342	my $self = shift;
				343	# Determine if/where we need to split before an ambiguous character
				344	my $new_split = 99999999999999999;
				345	foreach my $key (sort keys %{$self->{children}}) {
				346	my $special_length = $self->find_earlier_split($key);
				347	$new_split = $special_length if ($special_length < $new_split);
				348	}
				349
				350	# Start building a new uniform trie
				351	my $newself = Trie->new;
				352	$newself->{label} = $self->{label};
				353	$newself->{value} = $self->{value};
				354	$newself->{children} = {};
				355
				356	foreach my $key (sort keys %{$self->{children}}) {
				357	my $head = substr($key, 0, $new_split);
				358	my $tail = substr($key, $new_split);
				359	# Rebuild the child node at $head, pushing $tail downwards
				360	$newself->{children}{$head} //= Trie->new;
				361	$self->{children}{$key}->reinsert_value_nodes_into($newself->{children}{$head}, $tail);
				362	# We took up to one special character of each key label. There might
				363	# be more, so we need to rebuild recursively.
				364	$newself->{children}{$head} = $newself->{children}{$head}->rebuild_tree();
				365	}
				366
				367	return $newself;
				368	}
				369	}
				370
				371	# Code generator for C and C++
				372	package CCodeGen {
				373	my $static = ($code_name eq $header_name) ? "static " : "";
				374	my $enum_specifier = $enum_class ? "enum class" : "enum";
				375
				376	sub new {
				377	my $class = shift;
				378	my $self = {};
				379	bless $self, $class;
				380
				381	return $self;
				382	}
				383
				384	sub open_output {
				385	my $self = shift;
				386	if ($code_name ne '-') {
				387	open($code, '>', $code_name) or die "Cannot open $code_name: $!" ;
				388	} else {
				389	$code = *STDOUT;
				390	}
				391	if($code_name eq $header_name) {
				392	$header = $code;
				393	} elsif ($header_name ne '-') {
				394	open($header, '>', $header_name) or die "Cannot open $header_name: $!" ;
				395	} else {
				396	$header = *STDOUT;
				397	}
				398	}
				399
				400	sub mangle_label {
				401	my ($self, $label) = @_;
				402
				403	$label = $label_prefix . $label if defined($label_prefix);
				404	$label = uc $label if $label_uppercase;
				405
				406	return $label;
				407	}
				408
				409	sub word_to_label {
				410	my ($self, $word) = @_;
				411
				412	$word =~ s/_/__/g;
				413	$word =~ s/-/_/g;
				414
				415	return $self->mangle_label($word);
				416	}
				417
				418	# Return a case label, by shifting and or-ing bytes in the word
				419	sub case_label {
				420	my ($self, $key) = @_;
				421
				422	return sprintf("'%s'", substr($key, 0, 1)) if not $multi_byte;
				423
				424	my $output = '0';
				425
				426	for my $i (0..length($key)-1) {
				427	$output .= sprintf("\| onechar('%s', %d, %d)", substr($key, $i, 1), 8 * $i, 8*length($key));
				428	}
				429
				430	return $output;
				431	}
				432
				433	# Return an appropriate read instruction for $length bytes from $offset
				434	sub switch_key {
				435	my ($self, $offset, $length) = @_;
				436
				437	return "string[$offset]" if $length == 1;
				438	return sprintf("((triehash_uu%s) &string[$offset])", $length * 8);
				439	}
				440
				441	# Render the trie so that it matches the longest prefix.
				442	sub print_table {
				443	my ($self, $trie, $fh, $indent, $index) = @_;
				444	$indent //= 0;
				445	$index //= 0;
				446
				447	# If we have children, try to match them.
				448	if (%{$trie->{children}}) {
				449	# The difference between lowercase and uppercase alphabetical characters
				450	# is that they have one bit flipped. If we have alphabetical characters
				451	# in the search space, and the entire search space works fine if we
				452	# always turn on the flip, just OR the character we are switching over
				453	# with the bit.
				454	my $want_use_bit = 0;
				455	my $can_use_bit = 1;
				456	my $key_length = 0;
				457	foreach my $key (sort keys %{$trie->{children}}) {
				458	$can_use_bit &= not main::ambiguous($key);
				459	$want_use_bit \|= ($key =~ /^[a-zA-Z]+$/);
				460	$key_length = length($key);
				461	}
				462
				463	if ($ignore_case && $can_use_bit && $want_use_bit) {
				464	printf { $fh } ((' ' x $indent) . "switch(%s \| 0x%s) {\n", $self->switch_key($index, $key_length), '20' x $key_length);
				465	} else {
				466	printf { $fh } ((' ' x $indent) . "switch(%s) {\n", $self->switch_key($index, $key_length));
				467	}
				468
				469	my $notfirst = 0;
				470	foreach my $key (sort keys %{$trie->{children}}) {
				471	if ($notfirst) {
				472	printf { $fh } (' ' x $indent . " break;\n");
				473	}
				474	if ($ignore_case) {
				475	printf { $fh } (' ' x $indent . "case %s:\n", $self->case_label(lc($key)));
				476	printf { $fh } (' ' x $indent . "case %s:\n", $self->case_label(uc($key))) if lc($key) ne uc($key) && !($can_use_bit && $want_use_bit);
				477	} else {
				478	printf { $fh } (' ' x $indent . "case %s:\n", $self->case_label($key));
				479	}
				480
				481	$self->print_table($trie->{children}{$key}, $fh, $indent + 1, $index + length($key));
				482
				483	$notfirst=1;
				484	}
				485
				486	printf { $fh } (' ' x $indent . "}\n");
				487	}
				488
				489
				490	# This node has a value, so it is a possible end point. If no children
				491	# matched, we have found our longest prefix.
				492	if (defined $trie->{value}) {
				493	printf { $fh } (' ' x $indent . "return %s;\n", ($enum_class ? "${enum_name}::" : '').$trie->{label});
				494	}
				495
				496	}
				497
				498	sub print_words {
				499	my ($self, $trie, $fh, $indent, $sofar) = @_;
				500
				501	$indent //= 0;
				502	$sofar //= '';
				503
				504
				505	printf { $fh } (' ' x $indent."%s = %s,\n", $trie->{label}, $trie->{value}) if defined $trie->{value};
				506
				507	foreach my $key (sort keys %{$trie->{children}}) {
				508	$self->print_words($trie->{children}{$key}, $fh, $indent, $sofar . $key);
				509	}
				510	}
				511
				512	sub print_functions {
				513	my ($self, $trie, %lengths) = @_;
				514	foreach my $local_length (sort { $a <=> $b } (keys %lengths)) {
				515	print { $code } ("static enum ${enum_name} ${function_name}${local_length}(const char *string)\n");
				516	print { $code } ("{\n");
				517	$self->print_table($trie->filter_depth($local_length)->rebuild_tree(), $code, 1);
				518	printf { $code } (" return %s$unknown_label;\n", ($enum_class ? "${enum_name}::" : ''));
				519	print { $code } ("}\n");
				520	}
				521	}
				522
				523	sub main {
				524	my ($self, $trie, $num_values, %lengths) = @_;
				525	print { $header } ("#ifndef TRIE_HASH_${function_name}\n");
				526	print { $header } ("#define TRIE_HASH_${function_name}\n");
				527	print { $header } ("#include <stddef.h>\n");
				528	print { $header } ("#include <stdint.h>\n");
				529	foreach my $include (@includes) {
				530	print { $header } ("#include $include\n");
				531	}
				532	printf { $header } ("enum { $counter_name = $num_values };\n") if (defined($counter_name));
				533	print { $header } ("${enum_specifier} ${enum_name} {\n");
				534	$self->print_words($trie, $header, 1);
				535	printf { $header } (" $unknown_label = $unknown,\n");
				536	print { $header } ("};\n");
				537	print { $header } ("${static}enum ${enum_name} ${function_name}(const char *string, size_t length);\n");
				538
				539	print { $code } ("#include \"$header_name\"\n") if ($header_name ne $code_name);
				540
				541	if ($multi_byte) {
				542	print { $code } ("#ifdef __GNUC__\n");
				543	foreach my $i ((16, 32, 64)) {
				544	print { $code } ("typedef uint${i}_t __attribute__((aligned (1))) triehash_uu${i};\n");
				545	print { $code } ("typedef char static_assert${i}[__alignof__(triehash_uu${i}) == 1 ? 1 : -1];\n");
				546	}
				547
				548	print { $code } ("#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__\n");
				549	print { $code } ("#define onechar(c, s, l) (((uint64_t)(c)) << (s))\n");
				550	print { $code } ("#else\n");
				551	print { $code } ("#define onechar(c, s, l) (((uint64_t)(c)) << (l-8-s))\n");
				552	print { $code } ("#endif\n");
				553	print { $code } ("#if (!defined(__ARM_ARCH) \|\| defined(__ARM_FEATURE_UNALIGNED)) && !defined(TRIE_HASH_NO_MULTI_BYTE)\n");
				554	print { $code } ("#define TRIE_HASH_MULTI_BYTE\n");
				555	print { $code } ("#endif\n");
				556	print { $code } ("#endif /GNUC /\n");
				557
				558	print { $code } ("#ifdef TRIE_HASH_MULTI_BYTE\n");
				559	$self->print_functions($trie, %lengths);
				560	$multi_byte = 0;
				561	print { $code } ("#else\n");
				562	$self->print_functions($trie, %lengths);
				563	print { $code } ("#endif /* TRIE_HASH_MULTI_BYTE */\n");
				564	} else {
				565	$self->print_functions($trie, %lengths);
				566	}
				567
				568	print { $code } ("${static}enum ${enum_name} ${function_name}(const char *string, size_t length)\n");
				569	print { $code } ("{\n");
				570	print { $code } (" switch (length) {\n");
				571	foreach my $local_length (sort { $a <=> $b } (keys %lengths)) {
				572	print { $code } (" case $local_length:\n");
				573	print { $code } (" return ${function_name}${local_length}(string);\n");
				574	}
				575	print { $code } (" default:\n");
				576	printf { $code } (" return %s$unknown_label;\n", ($enum_class ? "${enum_name}::" : ''));
				577	print { $code } (" }\n");
				578	print { $code } ("}\n");
				579
				580	# Print end of header here, in case header and code point to the same file
				581	print { $header } ("#endif /* TRIE_HASH_${function_name} */\n");
				582	}
				583	}
				584
				585	# A character is ambiguous if the 1<<5 (0x20) bit does not correspond to the
				586	# lower case bit. A word is ambiguous if any character is. This definition is
				587	# used to check if we can perform the \|0x20 optimization when building a case-
				588	# insensitive trie.
				589	sub ambiguous {
				590	my $word = shift;
				591
				592	foreach my $char (split //, $word) {
				593	# If 0x20 does not solely indicate lowercase, it is ambiguous
				594	return 1 if ord(lc($char)) != (ord($char) \| 0x20);
				595	return 1 if ord(uc($char)) != (ord($char) & ~0x20);
				596	}
				597
				598	return 0;
				599	}
				600
				601	sub build_trie {
				602	my $codegen = shift;
				603	my $trie = Trie->new;
				604
				605	my $counter = $counter_start;
				606	my $prev_value;
				607	my %lengths;
				608
				609	open(my $input, '<', $ARGV[0]) or die "Cannot open $ARGV[0]: $!";
				610	while (my $line = <$input>) {
				611	my ($label, $word, $value) = $line =~ m{
				612	(?:\s([^~\s]+)\s~)? # Label ~
				613	(?:\s*([^~=\s]+))? # Word
				614	(?:\s=\s([^\s]+)\s+)? # = Value
				615	\s*
				616	}x;
				617
				618	if (defined $word) {
				619	$label //= $codegen->word_to_label($word);
				620	$value //= defined $prev_value ? $prev_value + 1 : 0;
				621
				622	$trie->insert($word, $label, $value);
				623	$lengths{length($word)} = 1;
				624	} elsif (defined $value) {
				625	$unknown = $value;
				626	$unknown_label = $codegen->mangle_label($label) if defined $label;
				627	} else {
				628	die "Invalid line: $line";
				629	}
				630
				631	$prev_value = $value;
				632	$counter = $value + 1 if $value >= $counter;
				633	}
				634
				635	$unknown_label //= $codegen->mangle_label('Unknown');
				636
				637	return ($trie, $counter, %lengths);
				638	}
				639
				640	# Generates an ASCII art tree
				641	package TreeCodeGen {
				642
				643	sub new {
				644	my $class = shift;
				645	my $self = {};
				646	bless $self, $class;
				647
				648	return $self;
				649	}
				650
				651	sub mangle_label {
				652	my ($self, $label) = @_;
				653	return $label;
				654	}
				655
				656	sub word_to_label {
				657	my ($self, $word) = @_;
				658	return $word;
				659	}
				660
				661	sub main {
				662	my ($self, $trie, $counter, %lengths) = @_;
				663	printf { $code } ("┌────────────────────────────────────────────────────┐\n");
				664	printf { $code } ("│ Initial trie │\n");
				665	printf { $code } ("└────────────────────────────────────────────────────┘\n");
				666	$self->print($trie);
				667	printf { $code } ("┌────────────────────────────────────────────────────┐\n");
				668	printf { $code } ("│ Rebuilt trie │\n");
				669	printf { $code } ("└────────────────────────────────────────────────────┘\n");
				670	$self->print($trie->rebuild_tree());
				671
				672	foreach my $local_length (sort { $a <=> $b } (keys %lengths)) {
				673	printf { $code } ("┌────────────────────────────────────────────────────┐\n");
				674	printf { $code } ("│ Trie for words of length %-4d │\n", $local_length);
				675	printf { $code } ("└────────────────────────────────────────────────────┘\n");
				676	$self->print($trie->filter_depth($local_length)->rebuild_tree());
				677	}
				678	}
				679
				680	sub open_output {
				681	my $self = shift;
				682	if ($code_name ne '-') {
				683	open($code, '>:encoding(utf8)', $code_name) or die "Cannot open $ARGV[0]: $!" ;
				684	} else {
				685	$code = *STDOUT;
				686	binmode($code, ':encoding(utf8)');
				687	}
				688	}
				689
				690	# Print a trie
				691	sub print {
				692	my ($self, $trie, $depth) = @_;
				693	$depth //= 0;
				694
				695	print { $code } (' → ') if defined($trie->{label});
				696	print { $code } ($trie->{label} // '', "\n");
				697	foreach my $key (sort keys %{$trie->{children}}) {
				698	print { $code } ('│ ' x ($depth), "├── $key");
				699	$self->print($trie->{children}{$key}, $depth + 1);
				700	}
				701	}
				702	}
				703
				704	my %codegens = (
				705	C => 'CCodeGen',
				706	tree => 'TreeCodeGen',
				707	);
				708
				709
				710	defined($codegens{$language}) or die "Unknown language $language. Valid choices: ", join(', ', keys %codegens);
				711	my $codegen = $codegens{$language}->new();
				712	my ($trie, $counter, %lengths) = build_trie($codegen);
				713
				714	$codegen->open_output();
				715	$codegen->main($trie, $counter, %lengths);
				716
				717
				718	=head1 LICENSE
				719
				720	triehash is available under the MIT/Expat license, see the source code
				721	for more information.
				722
				723	=head1 AUTHOR
				724
				725	Julian Andres Klode <jak@jak-linux.org>
				726
				727	=cut
				728