#!/opt/bin/perl

# the generated tables mostly have NOT been checked so far!

use v5.8.2;
use utf8;
use Encode;
use Encode::JP;
use Encode::CN;
use Encode::KR;
use Encode::HanExtra;
use Encode::JIS2K;

my $gen;

$TO_UNICODE = 0; # also generate to_unicode tables

sub linear {
   my ($a, $l, $h, $b) = @_;
   for ($l .. $h) {
      return unless defined $a->[$_] && $a->[$_] == $_ + ($b -  $l);
   }
   1;
}

sub wrap {
   my $res = "  ";
   $res .= "$1\n  " while $_[0] =~ /\G(.{90}\S*?)\s+/gc;
   $res .= "$1" if $_[0] =~ /\G(.*)$/;
   $res;
}

my $last_tab_full; # hack

sub gentab {
   my ($enc, $l, $h, $f, $rep) = @_;
   $last_tab_full = 0;

   " = {\n"
   . (wrap join ", ",
                            map +(sprintf "0x$f",
                                          defined $enc->[$_] ? $enc->[$_] : $last_tab_full++ * 0 + $rep
                                 ), $l..$h)
   . "\n};\n";
}

sub gen {
   my ($enc, $base, $ch) = @_;

   my ($fun, $tab);
   my (@t1, @t2);

   for (255, 159, 127, 126, 125) {
      if (linear $enc, 0, $_, 0) {
         undef $enc->[$_] for 0..$_;
         $fun .= sprintf "if (· <= 0x%04x) return ·;\n", $_;
      }
   }

   for (126, 127, 128) {
      if (linear $enc, $_, 159, $_) {
         undef $enc->[$_] for $_..159;
         $fun .= sprintf "if (0x%04x <= · && · <= 0x%04x) return ·;\n", $_, 159;
      }
   }

   for (126, 127) {
      next unless defined $enc->[$_];
      $fun .= sprintf "if (· == 0x%04x) return 0x%04x;\n", $_, delete $enc->[$_];
   }

   my @map;
   my @map2;

   for (0 .. $#$enc) {
      if (defined $enc->[$_]) {
         $map[$_] = $enc->[$_];
         $map2[$_ >> 8]++;
      }
   }

   for my $p (0..255) {
      if ($map2[$p]) {
         my $b = $p << 8;

         my ($l, $h);
         for my $i (0..255) { $l = $i, last if defined $map[$b + $i]; }
         for my $i (0..255) { $h = 255 - $i, last if defined $map[$b + 255 - $i]; }

         if ($map2[$p] <= 5) {
            for ($l .. $h) {
               next unless defined $enc->[$b + $_];
               $fun .= sprintf "if (· == 0x%04x) return 0x%04x;\n", $b + $_, $enc->[$b + $_];
            }
         } elsif (linear $enc, $b + $l, $b + $h, $enc->[$b + $l]) {
            my $dif = $enc->[$b + $l] - ($b + $l);
            $dif = $dif < 0 ? sprintf "- 0x%04x", -$dif : sprintf "+ 0x%04x", $dif;
            $fun .= sprintf "if (· <= 0x%04x && 0x%04x <= ·) return · %s;\n", $b + $l, $b + $h, $dif;
         } elsif ($map2[$p] <= 5) { # defunct
            $fun .= "switch (·)\n  {\n";
            for ($l .. $h) {
               next unless defined $enc->[$b + $_];
               $fun .= sprintf "    case 0x%04x: return 0x%04x;\n", $b + $_, $enc->[$b + $_];
            }
            $fun .= "  }\n";
         } else {
            my ($i, $t, $f) = (0, "uint8_t", "%02x");
            for ($l .. $h) {
               if ($enc->[$b + $_] > 255) {
                  ($i, $t, $f) = (1, "uint16_t", "%04x");
                  last;
               }
            }

            $i ? push @t2, [$p, $l, $h, $t, $f]
               : push @t1, [$b, $l, $h, $t, $f];
         }

      }
   }

   if (@t2 <= 3) {
      push @t1, @t2;
      @t2 = ();
   }

   for (@t1) {
      my ($b, $l, $h, $t, $f) = @$_;
      my $rep = $b + $l == 0 ? 128 : 0;
      $tab .= "static const $t $base\_$b\[]" . gentab $enc, $b + $l, $b + $h, $f;
      $fun .= sprintf "if (0x%04x <= · && · <= 0x%04x)\n"
                    . "  return %s$base\_$b\[· - 0x%04x];\n",
                      $b + $l, $b + $h,
                      ($last_tab_full ? sprintf "$base\_$b\[· - 0x%04x] == $rep ? NOCHAR : ", $b + $l : ""),
                      $b + $l;
   }

   if (@t2) {
      my ($min, $max) = (255, 0);
      my ($l, $h) = (255, 0);
      for (@t2) {
         my ($p, $L, $H, $t, $f) = @$_;
         $max = $p if $p > $max;
         $min = $p if $p < $min;
         $l = $L if $L < $l;
         $h = $H if $H > $h;
      }

      $fun .= "uint8_t  l = ·;\n";
      $fun .= "uint16_t h = · >> 8;\n";

      if ($h - $l < 200) {
         my $d = $h - $l + 1;

         my @enc;
         for $p ($min .. $max) {
            for $i ($l .. $h) {
               $enc[($p - $min) * $d + $i - $l] = $enc->[$p * 256 + $i];
            }
         }
         $tab .= "static const uint16_t $base\_m[]" . gentab \@enc, 0, $#enc, "%04x";

         if ($last_tab_full) {
            $fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n"
                          . "    && 0x%02x <= l && l <= 0x%02x)\n"
                          . "  return $base\_m\[h * 0x%02x + l - 0x%04x]\n"
                          . "         ? $base\_m\[h * 0x%02x + l - 0x%04x]\n"
                          . "         : NOCHAR;\n",
                            $min, $max, $l, $h,
                            ($d, $min * $d + $l) x 2;
         } else {
            $fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n"
                          . "    && 0x%02x <= l && l <= 0x%02x)\n"
                          . "  return $base\_m\[h * 0x%02x + l - 0x%04x];\n",
                            $min, $max, $l, $h,
                            $d,
                            $min * $d + $l;
         }
      } else {
         my @tab = (0) x ($max - $min);
         for (@t2) {
            my ($p, undef, undef, $t, $f) = @$_;
            $tab .= "static const $t $base\_$p\[]" . gentab $enc, $p * 256 + $l, $p * 256 + $h, $f, 0;
            $tab[$p - $min] = "$base\_$p";
         }

         $tab .= "const uint16_t *$base\_i[] = {\n"
                 . (wrap join ", ", @tab)
                 . "\n};\n\n";

         $fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n"
                       . "    && 0x%02x <= l && l <= 0x%02x\n"
                       . "    && $base\_i[h - 0x%02x])\n"
                       . "  return $base\_i\[h - 0x%02x][l - 0x%02x]\n"
                       . "         ? $base\_i\[h - 0x%02x][l - 0x%02x]\n"
                       . "         : NOCHAR;\n",
                         $min, $max, $l, $h,
                         $min, ($min, $l) x 2;
      }
   }

   $fun .= "return NOCHAR;\n";

   $fun =~ s/·/$ch/g;

   ($tab, $fun);
}

while (<DATA>) {
   my ($group, $base, $cs, $type) = split /\s+/;

   my @enc1;
   my @enc2;

   for (0 .. 65535) {
      my $enc = encode $cs, (chr $_), Encode::FB_QUIET;

      if (length $enc) {
         my $code = hex unpack "H*", $enc;
         $enc1[$_] = $code;
         $enc2[$code] = $_ unless defined $enc2[$code];
      }
   }

   my ($tab1, $fun1);
   my ($tab2, $fun2);

   ($tab1, $fun1) = gen \@enc1, "$base\_f", "unicode";
   ($tab2, $fun2) = gen \@enc2, "$base\_t", "enc" if $TO_UNICODE;

   $fun1 =~ s/^/    /gm;
   $fun2 =~ s/^/    /gm;

   print "$base\n";
   open OUT, ">table/$base.h" or die;

   print OUT <<EOF;
//
// AUTOMATICALLLY GENERATED by gentables
//
#ifdef ENCODING_$group

$tab1$tab2
static uint32_t cs_$base\_from_unicode (unicode_t unicode) {
$fun1}
EOF

if ($TO_UNICODE) {
   print OUT <<EOF
#if ENCODING_TO_UNICODE
static unicode_t cs_$base\_to_unicode (uint32_t enc) {
$fun2}
#endif
EOF
}

print OUT <<EOF;

#else

#define cs_$base\_from_unicode cs_unknown_from_unicode
#define cs_$base\_to_unicode   cs_unknown_to_unicode

#endif
EOF

   close OUT;
}

__DATA__
VN		viscii			viscii
KR		ksc5601_1987_0		ksc5601-raw
ZH		gb2312_1980_0		gb2312-raw
ZH		gbk_0			gbk
ZH_EXT		cns11643_1992_1		cns11643-1
ZH_EXT		cns11643_1992_2		cns11643-2
ZH_EXT		cns11643_1992_3		cns11643-3
ZH_EXT		cns11643_1992_4		cns11643-4
ZH_EXT		cns11643_1992_5		cns11643-5
ZH_EXT		cns11643_1992_6		cns11643-6
ZH_EXT		cns11643_1992_7		cns11643-7
ZH_EXT		cns11643_1992_f		cns11643-f
ZH		big5			big5
ZH_EXT		big5_ext		big5ext
ZH_EXT		big5_plus		big5plus
EU		koi8_r			koi8-r
EU		koi8_u			koi8-u
DEFAULT		iso8859_1		iso-8859-1
EU		iso8859_2		iso-8859-2
EU		iso8859_3		iso-8859-3
EU		iso8859_4		iso-8859-4
EU		iso8859_5		iso-8859-5
EU		iso8859_6		iso-8859-6
EU		iso8859_7		iso-8859-7
EU		iso8859_8		iso-8859-8
EU		iso8859_9		iso-8859-9
EU		iso8859_10		iso-8859-10
EU		iso8859_11		iso-8859-11
EU		iso8859_13		iso-8859-13
EU		iso8859_14		iso-8859-14	
DEFAULT		iso8859_15		iso-8859-15
EU		iso8859_16		iso-8859-16
JP		jis0201_1976_0		jis0201-raw
JP		jis0208_1990_0		jis0208-raw
JP		jis0212_1990_0		jis0212-raw
JP_EXT		jis0213_1		jis0213-1-raw
JP_EXT		jis0213_2		jis0213-2-raw
