(转)复合文档格式分析

时间:2022-06-22 17:42:19
标 题: 【原创】复合文档格式分析
作 者: kanghtta
时 间: 2009-12-27,01:25:11
链 接: http://bbs.pediy.com/showthread.php?t=103868

参加了看雪10周年活动。见了很多牛牛,但苦于近来工作忙,经常潜水中,,不能白拿kanxue的u盘,也来回报下,重新活动下。。
如今,复合文档格式被广泛使用,虽然格式早就被人研究明白,也有相应的文档。
但网上大多都是些文字介绍,因此,想写个格式分析的东西来加深对复合文档的理解,
此小程序有如下功能:
1:打印输出文档头 512字节
2:分析ssat,sat,short sat
3:分析各个steam 使用的sat 和ssat的扇区号,
可在windows 和linux 下使用,更多功能我会逐步扩充好加上来。。。
写此程序的目的只是为了巩固下对复合文档的学习,具体的格式就不写了
,附件里是复合文档格式,英文好的看,

有用得着的朋友注意下,由于只是解析了下,没有建立红黑树,没有考虑ssat大于109扇区的情况,需要的可以此基础上扩充,应该不难。。。
代码:
#include < iostream >
#include
< fstream >

#include
< math.h >
#include
< tchar.h >
#include
< cassert >
#include
< string >
#include
< iomanip >
#include
< vector >
#include
< list >

#include
" ComDoc.h "

using namespace std;

/* declare const */

int SectorSize = 0 ;
int miniSectorSize = 0 ;
ULONG MaxMiniStreamSize
= 0 ; /* * short stream 的最大长度。用来判断使用的sat表式那种类型,小于在ssat中 */
/* declare function */
bool DumpDocHeader(PDocHeader pHeader);
bool IfReadFile(ifstream & inStream,unsigned char * buf,unsigned int iReadOffest,size_t size);
int GetOffestFremSid(SECT sid);
bool ProcessDirEntry(PDirectoryEntry pDirEntry,vector < int > & slist,vector < int > & sslist,vector < vector < int > >& FatOfDirEntry, int );

int main( int argc, char * argv[])
{
cout
<< " Enter the name of the input file :\n " ;
string inputFileName;
getline(cin,inputFileName);

BYTE
* lpHeaderBuf = new BYTE[ 512 ];

memset(lpHeaderBuf,
0 , 512 );

ifstream inStream;
inStream.open(inputFileName.data(),ios::binary
| ios:: in );
assert(inStream.is_open());
inStream.read((
char * )lpHeaderBuf, 512 );
PDocHeader pHeaderSec
= (PDocHeader)lpHeaderBuf;

cout
<< " open the " << inputFileName << " file is successful\n " << endl;
DumpDocHeader(pHeaderSec);
MaxMiniStreamSize
= pHeaderSec -> _ulMiniSectorCutoff;
/* ******************************************************************** */
/* 处理msat */
/* ********************************************************************** */
unsigned
long iMastSize = 0 ;
vector
< int > vMastList; /* * 用于存储sat链表 */
if ((pHeaderSec -> _sectDifStart == ENDOFCHAIN) && (pHeaderSec -> _csectDif == 0 ))
{
for (iMastSize = 1 ;iMastSize <= 109 ;iMastSize ++ )
{
if ((( pHeaderSec -> _sectFat[iMastSize - 1 ] ) == 0xFFFFFFFF ))
{
break ;
}
else
{

BYTE
* SecBuf = new BYTE[SectorSize];
IfReadFile(inStream,SecBuf,GetOffestFremSid((pHeaderSec
-> _sectFat[iMastSize - 1 ] )),SectorSize);
int * pListOfMast = ( int * )SecBuf;
int i = 0 ;
while ( pListOfMast[i] != FREESECT )
{
vMastList.push_back(pListOfMast[i]);
// cout<<vMastList.front()<<" \t"<<vMastList.back()<<'\t';

cout
<< " MAST[ " << i << " ] == " ;
cout
<< vMastList[i] << ' \t ' ;
if (vMastList[vMastList.size() - 1 ] == ENDOFCHAIN)
{
cout
<< endl;
}
i
++ ;
}

delete []SecBuf;
}

}
}
else
{
/* * Mast 大于 109个扇区的情况 */
}

/* ********************************************************************** */
/* 处理ssat */
/* ********************************************************************** */

/*
* 读取存取ssat短链分配表占用的扇区链,
*/
vector
< int > vSsatFat;
vSsatFat.push_back(pHeaderSec
-> _sectMiniFatStart);
int index = vSsatFat[ 0 ];
while (vMastList[index] != ENDOFCHAIN )
{
index
= vMastList[index];
vSsatFat.push_back(index);

}

vSsatFat.push_back(vMastList[index]);
int i = 0 ; /* * 循环计数器 */
for (i; i < vSsatFat.size();i ++ )
{
cout
<< " S-FAT[ " << i << " ] == " ;
cout
<< hex << vSsatFat[i] << ' \t ' ;
}
cout
<< endl;

/*
* 读取ssat链,即用于记录short stream 的链
* pHeaderSec->_csectMiniFat 指出short stream 占几个扇区
*/
vector
< int > vSsatList;

for (i = 1 ; i <= pHeaderSec -> _csectMiniFat; i ++ )
{

BYTE
* SecBuf = new BYTE[SectorSize];
IfReadFile(inStream,SecBuf,GetOffestFremSid(vSsatFat[i
- 1 ]),SectorSize);
int * pListOfMast = ( int * )SecBuf;
int i = 0 ;
while ( pListOfMast[i] != FREESECT )
{
vSsatList.push_back(pListOfMast[i]);
// cout<<vMastList.front()<<" \t"<<vMastList.back()<<'\t';

cout
<< " SSAT[ " << i << " ] == " ;
cout
<< vSsatList[i] << ' \t ' ;
if (vSsatList[vSsatList.size() - 1 ] == ENDOFCHAIN)
{
cout
<< endl;
}
i
++ ;
}

delete []SecBuf;

}

/* ********************************************************************** */
/* Process Directory */
/* ********************************************************************** */

/*
* 读取存取directory 的扇区sid链
*/
vector
< int > vDirFat;
vDirFat.push_back(pHeaderSec
-> _sectDirStart);
index
= vDirFat[ 0 ];
while (vMastList[index] != ENDOFCHAIN )
{
index
= vMastList[index];
vDirFat.push_back(index);

}

vDirFat.push_back(vMastList[index]);

for (i = 0 ; i < vDirFat.size();i ++ )
{
cout
<< " Director-FAT[ " << i << " ] == " ;
cout
<< hex << vDirFat[i] << ' \t ' ;
}
cout
<< endl;
cout
<< vDirFat.size() << endl;

/*
*分析并处理directory目录
*/
vector
< DirectoryEntry > lDirList;
for (i = 0 ; i < (vDirFat.size() - 1 ) ;i ++ )
{
BYTE
* SecBuf = new BYTE[SectorSize];
IfReadFile(inStream,SecBuf,GetOffestFremSid(vDirFat[i]),SectorSize);
PDirectoryEntry pDirEntry
= PDirectoryEntry(SecBuf);
for ( int j = 0 ;j < 4 ;j ++ )
{
DirectoryEntry tempDirEntry
= pDirEntry[j];
lDirList.push_back(tempDirEntry);

}



delete []SecBuf;
}

vector
< vector < int > > vFatOfDirEntry;
/*
*processing DirectoryEntry list
*/
for (i = 0 ;i < lDirList.size();i ++ )
{
ProcessDirEntry(
& lDirList[i],vMastList,vSsatList,vFatOfDirEntry,i) ;
}



delete []lpHeaderBuf;

inStream.close();
return 0 ;
}

bool ProcessDirEntry(PDirectoryEntry pDirEntry,vector < int > & slist,vector < int > & sslist,vector < vector < int > >& vFatOfDirEntry, int i)
{
string DirName;
unsigned
int index;
vector
< int > FatOfDirEntry;
if (pDirEntry -> _cb == 0 )
{
return false ;
}
else
{
char buf[ 256 ] = { 0 , 0 };
wcstombs(buf,(wchar_t
* )pDirEntry -> _ab,(size_t)pDirEntry -> _cb);
DirName
= buf;

/* ********************************************************************** */
/* 是否是短流 */
/* ********************************************************************** */
if ((pDirEntry -> _ulSize < MaxMiniStreamSize) && (pDirEntry -> _mse != STGTY_ROOT))
{
FatOfDirEntry.push_back(pDirEntry
-> _sectStart);
index
= pDirEntry -> _sectStart;
while (sslist[index] != ENDOFCHAIN)
{
index
= sslist[index];
FatOfDirEntry.push_back(index);
}
FatOfDirEntry.push_back(sslist[index]);

}
else
{
FatOfDirEntry.push_back(pDirEntry
-> _sectStart);
index
= pDirEntry -> _sectStart;
while (slist[index] != ENDOFCHAIN)
{
index
= slist[index];
FatOfDirEntry.push_back(index);
}
FatOfDirEntry.push_back(slist[index]);
}
cout
<< DirName << ' \t ' ;
for ( int j = 0 ; j < FatOfDirEntry.size();j ++ )
{
cout
<< " \" \" <<DirName<<-FAT[ " << j << " ] == " ;
cout
<< hex << FatOfDirEntry[j] << ' \t ' ;
}
cout
<< endl;
vFatOfDirEntry.push_back(FatOfDirEntry);
FatOfDirEntry.clear();

}

return true ;

}



bool IfReadFile(ifstream & inStream,unsigned char * buf,unsigned int iReadOffest,size_t size)
{
inStream.seekg(iReadOffest,ios::beg);
inStream.read((
char * )buf,size);
return true ;

}

int GetOffestFremSid(SECT sid)
{
return sid * SectorSize + 512 ;
}


bool DumpDocHeader(PDocHeader pHeader)
{
cout
<< " \t The comdoc flag is\t{ " ;
for ( int i = 0 ; i < 8 ; i ++ )
{
cout
<< hex << ( int )pHeader -> _abSig[i] << " " ;
if (i != 7 )
{
cout
<< ' , ' ;
}
}
cout
<< ' } ' << endl;
if (pHeader -> _uByteOrder == 0xFFFE )
{
cout
<< " \t The file Byte order is Little-Endian " << endl;
}
cout.setf(ios::dec,ios::basefield);
SectorSize
= ( int )pow(( double ) 2 ,( int )pHeader -> _uSectorShift);

cout
<< " \t Size of a Sector in the compound document file is " << SectorSize << endl;
miniSectorSize
= ( int )pow(( double ) 2 ,( int )pHeader -> _uMiniSectorShift);
cout
<< " \t Size of a short-sector in the short-stream container stream is " << miniSectorSize << endl;
cout
<< " \t Total number of sectors used for the sector allocation table is " << (DWORD)pHeader -> _csectFat << endl;
cout
<< " \t SecID of first sector of the directory stream is " << (ULONG)pHeader -> _sectDirStart << endl;
cout
<< " \t Minimum size of a standard stream is " << (ULONG)pHeader -> _ulMiniSectorCutoff << endl;
cout
<< " \t SecID of first sector of the short-sector allocation table is " << (ULONG)pHeader -> _sectMiniFatStart << endl;
cout
<< " \t Total number of sectors used for the short-sector allocation table is " << (ULONG)pHeader -> _csectMiniFat << endl;
cout
<< " \t SecID of first sector of the master sector allocation table is " << pHeader -> _sectDifStart << endl;
cout
<< " \t Total number of sectors used for the master sector allocation table is " << (ULONG)pHeader -> _csectDif << endl;
cout
<< " \t First part of the master sector allocation table containing 109 SecIDs is " << endl;
cout
<< " \t {\t " ;
for (i = 0 ;i < 109 ;i ++ )
{
if ( pHeader -> _sectFat[i] != - 1 )
{
cout
<< pHeader -> _sectFat[i] << ' \t ' ;
if (i / 20 != 0 )
{
cout
<< endl;
}
}
else
{
break ;
}
}
cout
<< ' } ' << endl;
return true ;
}