簡介
檔案格式判斷雖然可以直接從副檔名直接查看,但對於一些無副檔名或是副檔名錯誤的檔案卻無得知實際的檔案型態,下面範例為使用Java來判斷檔案的標頭,方法就是將檔案bytes轉換成hex(十六進制)格式,在加以判斷檔案的編碼。範例程式碼
public class test {
/**
* byte陣列轉換成16進制字串
*
* @param src
* @return
*/
public static String bytesToHexString(byte[] src) {
StringBuilder stringBuilder = new StringBuilder();
if (src == null || src.length <= 0) {
return null;
}
for (int i = 0; i < src.length; i++) {
int v = src[i] & 0xFF;
String hv = Integer.toHexString(v);
if (hv.length() < 2) {
stringBuilder.append(0);
}
stringBuilder.append(hv);
}
return stringBuilder.toString();
}
/**
* 根據檔流讀取圖片檔真實類型
*
* @param is
* @return
*/
public static String getTypeByStream(FileInputStream is) {
byte[] b = new byte[4];
try {
is.read(b, 0, b.length);
} catch (IOException e) {
e.printStackTrace();
}
String type = bytesToHexString(b).toUpperCase();
if (type.contains("FFD8FF")) {
return "jpg";
} else if (type.contains("89504E47")) {
return "png";
} else if (type.contains("47494638")) {
return "gif";
} else if (type.contains("49492A00")) {
return "tif";
} else if (type.contains("424D")) {
return "bmp";
} else if (type.contains("494433")) {
return "mp3";
}
return type;
}
public static void main(String[] args) throws Exception {
String src = "2NE1";
FileInputStream is = new FileInputStream(src);
String type = getTypeByStream(is);
System.out.println(type);
}
}
檔案編碼列表
'jp2'=>'0000000C6A502020',
'3gp'=>'0000002066747970',
'3gp5'=>'0000001866747970',
'm4a'=>'00000020667479704D3441',
'ico'=>'00000100',
'spl'=>'00000100',
'vob'=>'000001BA',
'cur'=>'00000200',
'wb2'=>'00000200',
'wk1'=>'0000020006040600',
'wk3'=>'00001A0000100400',
'wk4'=>'00001A0002100400',
'wk5'=>'00001A0002100400',
'123'=>'00001A00051004',
'qxd'=>'00004D4D585052',
'mdf'=>'010F0000',
'tr1'=>'0110',
'rgb'=>'01DA01010003',
'drw'=>'07',
'dss'=>'02647373',
'dat'=>'A90D000000000000',
'db3'=>'03',
'qph'=>'03000000',
'adx'=>'80000020031204',
'db4'=>'04',
'n'=>'FFFE0000',
'a'=>'FFFE0000',
'skf'=>'07534B46',
'dtd'=>'0764743264647464',
'db'=>'D0CF11E0A1B11AE1',
'pcx'=>'0A050101',
'mp'=>'0CED',
'doc'=>'D0CF11E0A1B11AE1',
'nri'=>'0E4E65726F49534F',
'wks'=>'FF00020004040554',
'pf'=>'1100000053434341',
'ntf'=>'4E49544630',
'nsf'=>'4E45534D1A01',
'arc'=>'41724301',
'pak'=>'5041434B',
'eth'=>'1A350100',
'mkv'=>'1A45DFA393428288',
'ws'=>'1D7D',
'gz'=>'1F8B08',
'tar.z'=>'1FA0',
'ain'=>'2112',
'lib'=>'213C617263683E0A',
'msi'=>'D0CF11E0A1B11AE1',
'vmdk'=>'4B444D',
'dsp'=>'23204D6963726F73',
'amr'=>'2321414D52',
'hdr'=>'49536328',
'sav'=>'24464C3240282329',
'eps'=>'C5D0D3C6',
'pdf'=>'25504446',
'fdf'=>'25504446',
'hqx'=>'2854686973206669',
'log'=>'2A2A2A2020496E73',
'ivr'=>'2E524543',
'rm'=>'2E524D46',
'rmvb'=>'2E524D46',
'ra'=>'2E7261FD00',
'au'=>'646E732E',
'cat'=>'30',
'evt'=>'300000004C664C65',
'asf'=>'3026B2758E66CF11',
'wma'=>'3026B2758E66CF11',
'wmv'=>'3026B2758E66CF11',
'wri'=>'BE000000AB',
'7z'=>'377ABCAF271C',
'psd'=>'38425053',
'sle'=>'414376',
'asx'=>'3C',
'xdr'=>'3C',
'dci'=>'3C21646F63747970',
'manifest'=>'3C3F786D6C2076657273696F6E3D',
'xml'=>'3C3F786D6C2076657273696F6E3D22312E30223F3E',
'msc'=>'D0CF11E0A1B11AE1',
'fm'=>'3C4D616B65724669',
'mif'=>'56657273696F6E20',
'gid'=>'4C4E0200',
'hlp'=>'4C4E0200',
'dwg'=>'41433130',
'syw'=>'414D594F',
'abi'=>'414F4C494E444558',
'aby'=>'414F4C4442',
'bag'=>'414F4C2046656564',
'idx'=>'5000000020000000',
'ind'=>'414F4C494458',
'pfc'=>'414F4C564D313030',
'org'=>'414F4C564D313030',
'vcf'=>'424547494E3A5643',
'bin'=>'424C4932323351',
'bmp'=>'424D',
'dib'=>'424D',
'prc'=>'424F4F4B4D4F4249',
'bz2'=>'425A68',
'tar.bz2'=>'425A68',
'tbz2'=>'425A68',
'tb2'=>'425A68',
'rtd'=>'43232B44A4434DA5',
'cbd'=>'434246494C45',
'iso'=>'4344303031',
'clb'=>'434F4D2B',
'cpt'=>'43505446494C45',
'cru'=>'43525553482076',
'swf'=>'465753',
'ctf'=>'436174616C6F6720',
'dms'=>'444D5321',
'adf'=>'5245564E554D3A2C',
'dvr'=>'445644',
'ifo'=>'445644',
'cdr'=>'52494646',
'vcd'=>'454E545259564344',
'mdi'=>'4550',
'e01'=>'4C5646090D0AFF00',
'evtx'=>'456C6646696C6500',
'qbb'=>'458600000600',
'cpe'=>'464158434F564552',
'flv'=>'464C56',
'aiff'=>'464F524D00',
'eml'=>'582D',
'gif'=>'47494638',
'pat'=>'47504154',
'gx2'=>'475832',
'sh3'=>'4848474231',
'tif'=>'4D4D002B',
'tiff'=>'4D4D002B',
'mp3'=>'494433',
'koz'=>'49443303000000',
'crw'=>'49491A0000004845',
'cab'=>'4D534346',
'lit'=>'49544F4C49544C53',
'chi'=>'49545346',
'chm'=>'49545346',
'jar'=>'5F27A889',
'jg'=>'4A47040E000000',
'kgb'=>'4B47425F61726368',
'shd'=>'68490000',
'lnk'=>'4C00000001140200',
'obj'=>'80',
'pdb'=>'ACED000573720012',
'mar'=>'4D41723000',
'dmp'=>'504147454455',
'hdmp'=>'4D444D5093A7',
'mls'=>'4D563243',
'mmf'=>'4D4D4D440000',
'nvram'=>'4D52564E',
'ppz'=>'4D534346',
'snp'=>'4D534346',
'tlb'=>'4D53465402000100',
'dvf'=>'4D535F564F494345',
'msv'=>'4D535F564F494345',
'mid'=>'4D546864',
'midi'=>'4D546864',
'dsn'=>'4D56',
'com'=>'EB',
'dll'=>'4D5A',
'drv'=>'4D5A',
'exe'=>'4D5A',
'pif'=>'4D5A',
'qts'=>'4D5A',
'qtx'=>'4D5A',
'sys'=>'FFFFFFFF',
'acm'=>'4D5A',
'ax'=>'4D5A900003000000',
'cpl'=>'DCDC',
'fon'=>'4D5A',
'ocx'=>'4D5A',
'olb'=>'4D5A',
'scr'=>'4D5A',
'vbx'=>'4D5A',
'vxd'=>'4D5A',
'386'=>'4D5A',
'api'=>'4D5A900003000000',
'flt'=>'76323030332E3130',
'zap'=>'4D5A90000300000004000000FFFF',
'sln'=>'4D6963726F736F66742056697375616C',
'jnt'=>'4E422A00',
'jtp'=>'4E422A00',
'cod'=>'4E616D653A20',
'dbf'=>'4F504C4461746162',
'oga'=>'4F67675300020000',
'ogg'=>'4F67675300020000',
'ogv'=>'4F67675300020000',
'ogx'=>'4F67675300020000',
'dw4'=>'4F7B',
'pgm'=>'50350A',
'pax'=>'504158',
'pgd'=>'504750644D41494E',
'img'=>'EB3C902A',
'zip'=>'504B0304140000',
'docx'=>'504B030414000600',
'pptx'=>'504B030414000600',
'xlsx'=>'504B030414000600',
'kwd'=>'504B0304',
'odt'=>'504B0304',
'odp'=>'504B0304',
'ott'=>'504B0304',
'sxc'=>'504B0304',
'sxd'=>'504B0304',
'sxi'=>'504B0304',
'sxw'=>'504B0304',
'wmz'=>'504B0304',
'xpi'=>'504B0304',
'xps'=>'504B0304',
'xpt'=>'5850434F4D0A5479',
'grp'=>'504D4343',
'qemu'=>'514649',
'abd'=>'5157205665722E20',
'qsd'=>'5157205665722E20',
'reg'=>'FFFE',
'sud'=>'52454745444954',
'ani'=>'52494646',
'cmx'=>'52494646',
'ds4'=>'52494646',
'4xm'=>'52494646',
'avi'=>'52494646',
'cda'=>'52494646',
'qcp'=>'52494646',
'rmi'=>'52494646',
'wav'=>'52494646',
'cap'=>'58435000',
'rar'=>'526172211A0700',
'ast'=>'5343486C',
'shw'=>'53484F57',
'cpi'=>'FF464F4E54',
'sit'=>'5374756666497420',
'sdr'=>'534D415254445257',
'cnv'=>'53514C4F434F4E56',
'cal'=>'B5A2B0B3B3B0A5B5',
'info'=>'E310000100000000',
'uce'=>'55434558',
'ufa'=>'554641C6D2C1',
'pch'=>'564350434830',
'ctl'=>'56455253494F4E20',
'ws2'=>'575332303030',
'lwp'=>'576F726450726F',
'bdr'=>'5854',
'zoo'=>'5A4F4F20',
'ecf'=>'5B47656E6572616C',
'vcw'=>'5B4D535643',
'dun'=>'5B50686F6E655D',
'sam'=>'5B7665725D',
'cpx'=>'5B57696E646F7773',
'cfg'=>'5B666C7473696D2E',
'cas'=>'5F434153455F',
'cbk'=>'5F434153455F',
'arj'=>'60EA',
'vhd'=>'636F6E6563746978',
'csh'=>'6375736800000002',
'p10'=>'64000000',
'dex'=>'6465780A30303900',
'dsw'=>'64737766696C65',
'flac'=>'664C614300000022',
'dbb'=>'6C33336C',
'acd'=>'72696666',
'ram'=>'727473703A2F2F',
'dmg'=>'78',
'lgc'=>'7B0D0A6F20',
'lgd'=>'7B0D0A6F20',
'pwi'=>'7B5C707769',
'rtf'=>'7B5C72746631',
'psp'=>'7E424B00',
'wab'=>'9CCBCB8D1375D211',
'wpf'=>'81CDAB',
'png'=>'89504E470D0A1A0A',
'aw'=>'8A0109000000E108',
'hap'=>'91334846',
'skr'=>'9501',
'gpg'=>'99',
'pkr'=>'9901',
'qdf'=>'AC9EBD8F0000',
'pwl'=>'E3828596',
'dcx'=>'B168DE3A',
'tib'=>'B46E6844',
'acs'=>'C3ABCDAB',
'lbk'=>'C8007900',
'class'=>'CAFEBABE',
'dbx'=>'CFAD12FE',
'dot'=>'D0CF11E0A1B11AE1',
'pps'=>'D0CF11E0A1B11AE1',
'ppt'=>'D0CF11E0A1B11AE1',
'xla'=>'D0CF11E0A1B11AE1',
'xls'=>'D0CF11E0A1B11AE1',
'wiz'=>'D0CF11E0A1B11AE1',
'ac_'=>'D0CF11E0A1B11AE1',
'adp'=>'D0CF11E0A1B11AE1',
'apr'=>'D0CF11E0A1B11AE1',
'mtw'=>'D0CF11E0A1B11AE1',
'opt'=>'D0CF11E0A1B11AE1',
'pub'=>'D0CF11E0A1B11AE1',
'rvt'=>'D0CF11E0A1B11AE1',
'sou'=>'D0CF11E0A1B11AE1',
'spo'=>'D0CF11E0A1B11AE1',
'vsd'=>'D0CF11E0A1B11AE1',
'wps'=>'D0CF11E0A1B11AE1',
'ftr'=>'D20A0000',
'arl'=>'D42A',
'aut'=>'D42A',
'wmf'=>'D7CDC69A',
'efx'=>'DCFE',
'one'=>'E4525C7B8CD8A74D',
'rpm'=>'EDABEEDB',
'gho'=>'FEEF',
'ghs'=>'FEEF',
'wp'=>'FF575043',
'wpd'=>'FF575043',
'wpg'=>'FF575043',
'wpp'=>'FF575043',
'wp5'=>'FF575043',
'wp6'=>'FF575043',
'jfif'=>'FFD8FF',
'jpe'=>'FFD8FF',
'jpeg'=>'FFD8FF',
'jpg'=>'FFD8FF',
'mof'=>'FFFE23006C006900',
'ipa'=>'504B03040A',
判斷檔案格式望後就可以不必依賴那不可靠的副檔名,只需要讀取hex資料即可處理。
沒有留言:
張貼留言