解压缩的代码树中examples\c\appack.c例程演示如何使用该库执行压缩和解压缩,该程序很简单,我们就从它开始分析吧。
C:\Documents and Settings\Administrator\My Documents\下载\aPLib-1.01\examples\c>
appack c 1.doc p1.doc
===============================================================================
aPLib example Copyright (c) 1998-2009 by Joergen Ibsen / Jibz
All Rights Reserved
===============================================================================
compressed 110339 -> 36923 bytes (33%) in 0.05 seconds
这里显示压缩率是33%,
注意有可能压缩率会大于100%,文件大小不减反增,比较奇怪
对于已经压缩过的文件测试如下
C:\Documents and Settings\Administrator\My Documents\下载\aPLib-1.01\examples\c>
appack c 1.exe p1.exe
===============================================================================
aPLib example Copyright (c) 1998-2009 by Joergen Ibsen / Jibz
All Rights Reserved
===============================================================================
compressed 1545096 -> 1663141 bytes (107%) in 0.56 seconds
/*
* Show program syntax.
*/
void show_syntax(void)
{
printf("syntax:\n\n"
" compress : appack c
" decompress : appack d
}
/*
* Main.
*/
int main(int argc, char *argv[])
{
/* show banner */
printf("===============================================================================\n"
"aPLib example Copyright (c) 1998-2009 by Joergen Ibsen / Jibz\n"
" All Rights Reserved\n\n"
" \n"
"===============================================================================\n\n");
/* check number of arguments */
if (argc != 4)//参数个数必须为4
{
show_syntax();
return 1;
}
#ifdef __WATCOMC__
/* OpenWatcom 1.2 line buffers stdout, so we unbuffer stdout manually
to make the progress indication in the callback work.
*/
setbuf(stdout, NULL);
#endif
/* check first character of first argument to determine action */
if (argv[1][0] && argv[1][1] == '\0')
{
switch (argv[1][0])
{
/* compress file */
case 'c':
case 'C': return compress_file(argv[2], argv[3]);
/* decompress file */
case 'd':
case 'D': return decompress_file(argv[2], argv[3]);
}
}
/* show program syntax */
show_syntax();
return 1;
}
先看压缩算法
/*
* Unsigned char type.
*/
typedef unsigned char byte;
/*
* Compute ratio between two numbers.
*/
unsigned int ratio(unsigned int x, unsigned int y)
{
if (x <= UINT_MAX / 100) x *= 100; else y /= 100;
if (y == 0) y = 1;
return x / y;
}
/*
* Compression callback.
*/
int CB_CALLCONV callback(unsigned int insize, unsigned int inpos, unsigned int outpos, void *cbparam)
{
printf("\rcompressed %u -> %u bytes (%u%% done)", inpos, outpos, ratio(inpos, insize));//在原行显示压缩进度
return 1;
}
/*
* Compress a file.
*/
int compress_file(const char *oldname, const char *packedname)
{
FILE *oldfile;
FILE *packedfile;
unsigned int insize, outsize;
clock_t clocks;
byte *data, *packed, *workmem;
/* open input file */
if ((oldfile = fopen(oldname, "rb")) == NULL)
{
printf("\nERR: unable to open input file\n");
return 1;
}
/* get size of input file 取得输入文件大小,使用stat不是更好,还是因为考虑到兼容性?*/
fseek(oldfile, 0, SEEK_END);
insize = (unsigned int) ftell(oldfile);
fseek(oldfile, 0, SEEK_SET);
/* allocate memory 分配内存*/
if ((data = (byte *) malloc(insize)) == NULL ||
(packed = (byte *) malloc(aP_max_packed_size(insize))) == NULL ||
(workmem = (byte *) malloc(aP_workmem_size(insize))) == NULL)
{
printf("\nERR: not enough memory\n");
return 1;
}
if (fread(data, 1, insize, oldfile) != insize)//全部读入输入文件
{
printf("\nERR: error reading from input file\n");
return 1;
}
clocks = clock();//计时
/* compress data block 压缩数据 */
outsize = aPsafe_pack(data, packed, insize, workmem, callback, NULL);
clocks = clock() - clocks;
/* check for compression error */
if (outsize == APLIB_ERROR)
{
printf("\nERR: an error occured while compressing\n");
return 1;
}
/* create output file */
if ((packedfile = fopen(packedname, "wb")) == NULL)
{
printf("\nERR: unable to create output file\n");
return 1;
}
fwrite(packed, 1, outsize, packedfile);//写入输出文件
/* show result */
printf("\rcompressed %u -> %u bytes (%u%%) in %.2f seconds\n",
insize, outsize, ratio(outsize, insize),
(double)clocks / (double)CLOCKS_PER_SEC);//在原行显示报告
/* close files */
fclose(packedfile);
fclose(oldfile);
/* free memory */
free(workmem);
free(packed);
free(data);
return 0;
}
逻辑很清晰.回调函数用于显示压缩进度,\r代表回车,即在原行显示。
回调函数原型是
int CB_CALLCONV callback(unsigned int insize, unsigned int inpos, unsigned int outpos, void *cbparam)
主要参数是源文件大小,当前输入位置,当前输出位置.
这里用到的三个函数
aP_max_packed_size
aP_workmem_size
aPsafe_pack
都是aPLib中的.
先看aP_max_packed_size的定义,这个函数没有提供源码,只能用IDA反汇编
aPLib-1.01\lib\dll\aplib.dll
来看
; __stdcall aP_max_packed_size(x)
public _aP_max_packed_size@4
_aP_max_packed_size@4 proc near
arg_0= dword ptr 4
mov eax, [esp+arg_0] ; _aP_max_packed_size
mov ecx, eax
shr ecx, 3
lea eax, [ecx+eax+40h]
retn 4
_aP_max_packed_size@4 endp
这个函数很简单,返回insize>>3+insize+40h
aP_workmem_size
; __stdcall aP_workmem_size(x)
public _aP_workmem_size@4
_aP_workmem_size@4 proc near
mov eax, 0A0000h ; _aP_workmem_size
retn 4
_aP_workmem_size@4 endp
直接返回0a0000h.
aPsafe_pack倒是提供了汇编源码,在aPLib-1.01\src\32bit\spack.asm中
; header format:
;压缩文件头部格式
; offs size data
; --------------------------------------
; 0 dword tag ('AP32')
; 4 dword header_size (24 bytes)
; 8 dword packed_size
; 12 dword packed_crc
; 16 dword orig_size
; 20 dword orig_crc
format MS COFF
public aPsafe_pack as '_aPsafe_pack'
extrn '_aP_pack' as aP_pack
extrn '_aP_crc32' as aP_crc32
; =============================================================
section '.text' code readable executable
aPsafe_pack:
; aPsafe_pack(const void *source,
; void *destination,
; unsigned int length,
; void *workmem,
; int (*callback)(unsigned int, unsigned int, void *),
; void *cbparam)
.ret$ equ 7*4
.src$ equ 8*4 + 4
.dst$ equ 8*4 + 8
.len$ equ 8*4 + 12
.wmem$ equ 8*4 + 16
.cb$ equ 8*4 + 20
.cbp$ equ 8*4 + 24
/*
按顺序EAX、ECX、EDX、EBX、ESP、EBP、ESI 及 EDI,然後將堆疊指標值減去三十二
可见
.ret$ equ 7*4指向压入的eax处
8*4 指向返回地址
.src$ equ 8*4 + 4 指向inbuffer
等等
*/
pushad
mov ebp, esp
mov esi, [ebp + .src$] ; esi -> inbuffer
mov edi, [ebp + .dst$] ; edi -> outbuffer
mov ecx, [ebp + .len$] ; ecx = length
or eax, -1 ; eax = -1,错误值
test esi, esi ; check parameters
jz .return_eax ;
test edi, edi ;
jz .return_eax ;
test ecx, ecx ;
jz .return_eax ;
mov ebx, 032335041h ;压缩文件头部magic,'AP32'
mov [edi], ebx ; set header.tag
mov ebx, 24 ;头部大小
mov [edi + 4], ebx ; set header.header_size
add ebx, edi ; ebx -> destination for packed data,ebx指向头部后
mov [edi + 16], ecx ; set header.orig_size,原始文件大小
push ecx
push esi
call aP_crc32 ;调用aP_crc32,计算原始文件crc32值
add esp, 8 ;平栈
mov [edi + 20], eax ; set header.orig_crc,原始crc
push dword [ebp + .cbp$] ; callback param
push dword [ebp + .cb$] ; callback
push dword [ebp + .wmem$] ; workmem
push ecx ; length
push ebx ; destination
push esi ; source
call aP_pack ;调用aP_pack,执行压缩
add esp, 24 ;平栈
cmp eax, -1 ;返回值为错
je .return_eax
mov [edi + 8], eax ; set header.packed_size,压缩后的大小
mov edx, eax ; edx = packed size
push eax
push ebx
call aP_crc32 ;压缩后的crc32
add esp, 8
mov [edi + 12], eax ; set header.packed_crc
lea eax, [edx + 24] ; eax = packed size + header size ,压缩文件大小,包括头
.return_eax:
mov [esp + .ret$], eax ; return unpacked length in eax存入栈上eax处,popad会弹到eax寄存器中
popad
ret
到这里又涉及到两个函数aP_crc32和aP_pack
ap_crc32提供了汇编源码,aP_pack没有提供源码,需要反汇编阅读
aPLib-1.01\src\32bit\crc32.asm
public aP_crc32 as '_aP_crc32'
; =============================================================
macro docrcM
{
mov ebx, 0x000000ff
and ebx, eax ;取eax的最低字节,即al
shr eax, 8 ;eax右移8位
xor eax, [edi+ebx*4] ;用al索引crc_tab取双字,和eax异或
}
macro docrcbyteM
{
xor al, [esi] ;取一字节,和al异或
inc esi
docrcM ;进行一轮计算
}
macro docrcdwordM
{
xor eax, [esi],取一个双字
add esi, 4
docrcM ;进行四轮计算
docrcM
docrcM
docrcM
}
; =============================================================
section '.text' code readable executable
aP_crc32:
; aP_crc32(const void *source, unsigned int length)
.ret$ equ 7*4
.src$ equ 8*4 + 4
.len$ equ 8*4 + 8
pushad
mov esi, [esp + .src$] ; esi -> buffer
mov ecx, [esp + .len$] ; ecx = length
mov edi, aP_crctab ; edi -> crctab crc表,有255个dword
sub eax, eax ; eax清0
test esi, esi ;无效,返回0
jz .c_exit
sub eax, 1 ; eax==-1
test ecx, ecx ;长度为0,返回-1
jz .c_done
.c_align_loop:
test esi, 3 ;是否对其到4字节边界
jz .c_aligned_now ;esi已经对齐到4字节边界
docrcbyteM ;否则先处理不对齐的部分
dec ecx ;已经处理1字节
jnz .c_align_loop ;是否还有未处理的字节
.c_aligned_now:
mov edx, ecx
and edx, 7 ;长度对8的余数
shr ecx, 3 ;长度对8的商
jz .c_LT_eight ;商为0,处理余数部分
.c_next_eight:
docrcdwordM;计算一个双字
docrcdwordM;计算下一个双字
dec ecx
jnz .c_next_eight
.c_LT_eight:
mov ecx, edx ;余数部分
test ecx, ecx
jz .c_done ;余数部分处理完
.c_last_loop:;余数部分未处理完
docrcbyteM
dec ecx
jnz .c_last_loop
.c_done:
not eax ;取反
.c_exit:
mov [esp + .ret$], eax ; return crc32 in eax
popad
ret
; =============================================================
section '.rdata' data readable
aP_crctab dd 000000000h, 077073096h, 0ee0e612ch, 0990951bah, 0076dc419h
dd 0706af48fh, 0e963a535h, 09e6495a3h, 00edb8832h, 079dcb8a4h
dd 0e0d5e91eh, 097d2d988h, 009b64c2bh, 07eb17cbdh, 0e7b82d07h
dd 090bf1d91h, 01db71064h, 06ab020f2h, 0f3b97148h, 084be41deh
dd 01adad47dh, 06ddde4ebh, 0f4d4b551h, 083d385c7h, 0136c9856h
dd 0646ba8c0h, 0fd62f97ah, 08a65c9ech, 014015c4fh, 063066cd9h
dd 0fa0f3d63h, 08d080df5h, 03b6e20c8h, 04c69105eh, 0d56041e4h
dd 0a2677172h, 03c03e4d1h, 04b04d447h, 0d20d85fdh, 0a50ab56bh
dd 035b5a8fah, 042b2986ch, 0dbbbc9d6h, 0acbcf940h, 032d86ce3h
dd 045df5c75h, 0dcd60dcfh, 0abd13d59h, 026d930ach, 051de003ah
dd 0c8d75180h, 0bfd06116h, 021b4f4b5h, 056b3c423h, 0cfba9599h
dd 0b8bda50fh, 02802b89eh, 05f058808h, 0c60cd9b2h, 0b10be924h
dd 02f6f7c87h, 058684c11h, 0c1611dabh, 0b6662d3dh, 076dc4190h
dd 001db7106h, 098d220bch, 0efd5102ah, 071b18589h, 006b6b51fh
dd 09fbfe4a5h, 0e8b8d433h, 07807c9a2h, 00f00f934h, 09609a88eh
dd 0e10e9818h, 07f6a0dbbh, 0086d3d2dh, 091646c97h, 0e6635c01h
dd 06b6b51f4h, 01c6c6162h, 0856530d8h, 0f262004eh, 06c0695edh
dd 01b01a57bh, 08208f4c1h, 0f50fc457h, 065b0d9c6h, 012b7e950h
dd 08bbeb8eah, 0fcb9887ch, 062dd1ddfh, 015da2d49h, 08cd37cf3h
dd 0fbd44c65h, 04db26158h, 03ab551ceh, 0a3bc0074h, 0d4bb30e2h
dd 04adfa541h, 03dd895d7h, 0a4d1c46dh, 0d3d6f4fbh, 04369e96ah
dd 0346ed9fch, 0ad678846h, 0da60b8d0h, 044042d73h, 033031de5h
dd 0aa0a4c5fh, 0dd0d7cc9h, 05005713ch, 0270241aah, 0be0b1010h
dd 0c90c2086h, 05768b525h, 0206f85b3h, 0b966d409h, 0ce61e49fh
dd 05edef90eh, 029d9c998h, 0b0d09822h, 0c7d7a8b4h, 059b33d17h
dd 02eb40d81h, 0b7bd5c3bh, 0c0ba6cadh, 0edb88320h, 09abfb3b6h
dd 003b6e20ch, 074b1d29ah, 0ead54739h, 09dd277afh, 004db2615h
dd 073dc1683h, 0e3630b12h, 094643b84h, 00d6d6a3eh, 07a6a5aa8h
dd 0e40ecf0bh, 09309ff9dh, 00a00ae27h, 07d079eb1h, 0f00f9344h
dd 08708a3d2h, 01e01f268h, 06906c2feh, 0f762575dh, 0806567cbh
dd 0196c3671h, 06e6b06e7h, 0fed41b76h, 089d32be0h, 010da7a5ah
dd 067dd4acch, 0f9b9df6fh, 08ebeeff9h, 017b7be43h, 060b08ed5h
dd 0d6d6a3e8h, 0a1d1937eh, 038d8c2c4h, 04fdff252h, 0d1bb67f1h
dd 0a6bc5767h, 03fb506ddh, 048b2364bh, 0d80d2bdah, 0af0a1b4ch
dd 036034af6h, 041047a60h, 0df60efc3h, 0a867df55h, 0316e8eefh
dd 04669be79h, 0cb61b38ch, 0bc66831ah, 0256fd2a0h, 05268e236h
dd 0cc0c7795h, 0bb0b4703h, 0220216b9h, 05505262fh, 0c5ba3bbeh
dd 0b2bd0b28h, 02bb45a92h, 05cb36a04h, 0c2d7ffa7h, 0b5d0cf31h
dd 02cd99e8bh, 05bdeae1dh, 09b64c2b0h, 0ec63f226h, 0756aa39ch
dd 0026d930ah, 09c0906a9h, 0eb0e363fh, 072076785h, 005005713h
dd 095bf4a82h, 0e2b87a14h, 07bb12baeh, 00cb61b38h, 092d28e9bh
dd 0e5d5be0dh, 07cdcefb7h, 00bdbdf21h, 086d3d2d4h, 0f1d4e242h
dd 068ddb3f8h, 01fda836eh, 081be16cdh, 0f6b9265bh, 06fb077e1h
dd 018b74777h, 088085ae6h, 0ff0f6a70h, 066063bcah, 011010b5ch
dd 08f659effh, 0f862ae69h, 0616bffd3h, 0166ccf45h, 0a00ae278h
dd 0d70dd2eeh, 04e048354h, 03903b3c2h, 0a7672661h, 0d06016f7h
dd 04969474dh, 03e6e77dbh, 0aed16a4ah, 0d9d65adch, 040df0b66h
dd 037d83bf0h, 0a9bcae53h, 0debb9ec5h, 047b2cf7fh, 030b5ffe9h
dd 0bdbdf21ch, 0cabac28ah, 053b39330h, 024b4a3a6h, 0bad03605h
dd 0cdd70693h, 054de5729h, 023d967bfh, 0b3667a2eh, 0c4614ab8h
dd 05d681b02h, 02a6f2b94h, 0b40bbe37h, 0c30c8ea1h, 05a05df1bh
dd 02d02ef8dh
因此整个算法很简单,就是进行length轮计算,伪代码如下
unsigned int aP_crc32(unsigned char * src,unsigned int lenght)
{
unsigned int a=-1;
unsigned char * pa=&a;
unsigned int b;
for(int i=0;i
pa[0]^=src[0];
b=crc_tab[pa[0]];
a=a>>8;
a^=b;
src++;
}
return ~a;
}
aP_pack下一篇再分析