1
0
mirror of https://github.com/koenkooi/foo2zjs.git synced 2026-01-22 03:34:49 +08:00
foo2zjs/msexpand
Koen Kooi 66819e6d05 import 'Tarball last modified: Fri Jan 22 05:14:24 2016 CST'
Signed-off-by: Koen Kooi <koen@dominion.thruhere.net>
2016-01-25 16:22:04 +01:00

184 lines
5.6 KiB
Perl
Executable File

#!/usr/bin/perl
# msexpand written by Paul Laufer, 2001, to help him learn perl ;)
# - added fillup with zeros if expanded file is less than original;
# - added binmode() to work on platforms which need it; by G. Knauf.
# last change: 10-May-2002 gk.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
# Microsoft compress file structure:
# Uses a 12 bit (4k) sliding window Lempel Ziv variant.
#
# header: 13 bytes
# int MAGIC1 = 0x44445a53; // "SZDD"
# int MAGIC2 = 0x3327f088;
# char MAGIC3 = 0x41;
# char last_char_of_filename; // offset 0x09
# unsigned short int size_low;
# unsigned short int size_high;
#
# data portion:
# Flag byte followed by eight data elements, for each of the eight flag bits.
# If a flag bit is 1, the corresponding element is a data byte (just copied).
# If the flag bit is 0, the corresponding element is a code, comprised of two
# bytes. The two bytes are divided into two parts. The upper 12 bits are the
# offset into the 4k window, and the lower 4 bits are the length of the string,
# minus 3. Thus the string length is between 3 and 18 bytes.
# Flag byte Eight data elements
# 10010011
# |||||||+----byte
# ||||||+-----byte
# |||||+------code
# ||||+-------code
# |||+--------byte
# ||+---------code
# |+----------code
# +-----------byte
#
# The window is wrapped, ie: with an offset of 4092 and a length of 10, you
# will get the last 4 bytes then the first 6 bytes of the window in the output.
#
# Of course, the window must be updated with expanded strings and bytes as they
# are read. Oh, yeah, the window needs to be initialized with spaces, not
# zeros.
$MAGIC1 = 0x44445a53;
$MAGIC2 = 0x3327f088;
$MAGIC3 = 0x41;
$WINSIZE = 4096; # Window Size
$HEADSIZE = 14; # Size of file header
sub LENGTH {
my $x = shift;
return ($x & 0x0F) + 3;
}
sub OFFSET {
my ($x1, $x2) = @_;
return (((($x2 & 0xF0) << 4) + $x1 + 0x0010) & 0x0FFF)
}
sub WRAPFIX {
my $x = shift;
return ($x & ($WINSIZE - 1));
}
sub BITSET {
my ($byte, $bit) = @_;
return (($byte & (1<<$bit)) > 0);
}
# This sub directly translated from my C source. Runs slow in perl...
sub LZ_expand {
my ($input, $size_uncomp) = @_;
local ($curr_pos, $location, $bit_map, $byte1, $byte2);
local ($window[$WINSIZE], $length, $counter, $x, $max);
# initialize window to all spaces (cleaner way?)
for($x = 0; $x < $WINSIZE; $x += 1) {
$window[$x] = ' ';
}
$curr_pos = 0;
$index = $HEADSIZE;
while($curr_pos < $size_uncomp) {
$bit_map = unpack('C', substr($input, $index, 1));
$index++;
if($index >= $size_orig) {
return $curr_pos;
}
for($counter = 0; $counter < 8; $counter++) {
if(!BITSET($bit_map, $counter)) {
# Its a code, so process
($byte1, $byte2) =
unpack('CC', substr($input, $index, 2));
$index += 2;
if($index >= $size_orig) {
return $curr_pos;
}
$length = LENGTH($byte2);
$location = OFFSET($byte1, $byte2);
while($length > 0) {
$byte1 = $window[WRAPFIX($location)];
$window[WRAPFIX($curr_pos)] = $byte1;
printf(OUTFILE "%c", $byte1);
$curr_pos++;
$location++;
$length--;
}
} else {
# Its just a data byte
$byte1 = unpack('C', substr($input, $index, 1));
$index++;
$window[WRAPFIX($curr_pos)] = $byte1;
printf(OUTFILE "%c", $byte1);
$curr_pos++;
}
if ($index >= $size_orig) {
return $curr_pos;
}
}
}
return $curr_pos;
}
# start here
if($#ARGV < 0) {
print "Microsoft Compressed File Expander\n";
print "Written by Paul Laufer 2001-03-13\n\n";
print "Usage:\n\tmsexpand.pl FILE\n\n";
print "Where FILE is a valid Microsoft Compressed file. Files of this type\nusually have the last letter replaced with an underscore, ie rmquasar.vx_.\nThe expanded file will be the original filename with the last underscore\nreplaced with the original letter, ie rmquasar.vxd.\n\n";
exit(1);
}
open(INFILE,"<$ARGV[0]") or die "Can't open file for input:";
binmode(INFILE);
undef $/;
$input = <INFILE>;
close(INFILE);
($magic1, $magic2, $magic3, $lastchar, $size_low, $size_high) =
unpack 'IICa1SS', substr($input, 0, $HEADSIZE);
if( $magic1 != $MAGIC1 || $magic2 != $MAGIC2 || $magic3 != $MAGIC3 ) {
print "Error: Input file is not a Microsoft Compress format.\n";
exit(1);
} else {
print "Input file appears to be Microsoft Compress format, proceeding\n";
}
$outfile = $ARGV[0];
substr($outfile, -1, 1) = $lastchar;
print "Output filename = ", $outfile, "\n";
$size_uncomp = ($size_high << 0x10) + $size_low;
$size_orig = -s $ARGV[0];
print "Original file size: ", $size_uncomp, " bytes\n";
printf "Compression ratio: %.1f%%\n", $size_orig*100/$size_uncomp;
open(OUTFILE, ">$outfile") or die "Can't open file for output:";
binmode(OUTFILE);
# Time to start expanding the file
$size_expand = LZ_expand($input, $size_uncomp);
printf("Expanded size: %d bytes\n", $size_expand);
printf(OUTFILE "%s", ("\x0" x ($size_uncomp-$size_expand))) if ($size_expand < $size_uncomp);
close(OUTFILE);
printf("Output file size: %d bytes\n", (stat($outfile))[7]);