com.mycomm.itool.compress.huffman.HuffmanUtil Maven / Gradle / Ivy
The newest version!
/*
* Copyright 2020 jw362j.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.mycomm.itool.compress.huffman;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.util.Comparator;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.PriorityQueue;
public class HuffmanUtil {
private PriorityQueue queue = null;
public void compress(File inputFile, File outputFile) {
Compare cmp = new Compare();
queue = new PriorityQueue(12, cmp);
// 映射字节及其对应的哈夫曼编码
HashMap map = new HashMap();
int i, char_kinds = 0;
int char_tmp, file_len = 0;
FileInputStream fis = null;
FileOutputStream fos = null;
DataOutputStream oos = null;
HufTree root = new HufTree();
String code_buf = null;
// 临时储存字符频度的数组
TmpNode[] tmp_nodes = new TmpNode[256];
for (i = 0; i < 256; i++) {
tmp_nodes[i] = new TmpNode();
tmp_nodes[i].weight = 0;
tmp_nodes[i].Byte = (byte) i;
}
try {
fis = new FileInputStream(inputFile);
fos = new FileOutputStream(outputFile);
oos = new DataOutputStream(fos);
/*
* 统计字符频度,计算文件长度
*/
while ((char_tmp = fis.read()) != -1) {
tmp_nodes[char_tmp].weight++;
file_len++;
}
fis.close();
// 排序,将频度为0的字节放在最后,同时计算除字节的种类,即有多少个不同的字节
Arrays.sort(tmp_nodes);
for (i = 0; i < 256; i++) {
if (tmp_nodes[i].weight == 0) {
break;
}
HufTree tmp = new HufTree();
tmp.Byte = tmp_nodes[i].Byte;
tmp.weight = tmp_nodes[i].weight;
queue.add(tmp);
}
char_kinds = i;
if (char_kinds == 1) {
oos.writeInt(char_kinds);
oos.writeByte(tmp_nodes[0].Byte);
oos.writeInt(tmp_nodes[0].weight);
} else {
// 建树
createTree(queue);
root = queue.peek();
// 生成哈夫曼编码
hufCode(root, "", map);
// 写入字节种类
oos.writeInt(char_kinds);
for (i = 0; i < char_kinds; i++) {
oos.writeByte(tmp_nodes[i].Byte);
oos.writeInt(tmp_nodes[i].weight);
}
oos.writeInt(file_len);
fis = new FileInputStream(inputFile);
code_buf = "";
while ((char_tmp = fis.read()) != -1) {
code_buf += map.get((byte) char_tmp);
while (code_buf.length() >= 8) {
char_tmp = 0;
for (i = 0; i < 8; i++) {
char_tmp <<= 1;
if (code_buf.charAt(i) == '1') {
char_tmp |= 1;
}
}
oos.writeByte((byte) char_tmp);
code_buf = code_buf.substring(8);
}
}
// 最后编码长度不够8位的时候,用0补齐
if (code_buf.length() > 0) {
char_tmp = 0;
for (i = 0; i < code_buf.length(); ++i) {
char_tmp <<= 1;
if (code_buf.charAt(i) == '1') {
char_tmp |= 1;
}
}
char_tmp <<= (8 - code_buf.length());
oos.writeByte((byte) char_tmp);
}
oos.close();
fis.close();
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void extract(File inputFile, File outputFile) {
Compare cmp = new Compare();
queue = new PriorityQueue(12, cmp);
int i;
int file_len = 0;
int writen_len = 0;
FileInputStream fis = null;
FileOutputStream fos = null;
DataInputStream ois = null;
int char_kinds = 0;
HufTree root = new HufTree();
byte code_tmp;
try {
fis = new FileInputStream(inputFile);
ois = new DataInputStream(fis);
fos = new FileOutputStream(outputFile);
char_kinds = ois.readInt();
// 字节只有一种
if (char_kinds == 1) {
code_tmp = ois.readByte();
file_len = ois.readInt();
while ((file_len--) != 0) {
fos.write(code_tmp);
}
} else {
for (i = 0; i < char_kinds; i++) {
HufTree tmp = new HufTree();
tmp.Byte = ois.readByte();
tmp.weight = ois.readInt();
// System.out.println("Byte: " + tmp.Byte + " weight: " + tmp.weight);
queue.add(tmp);
}
createTree(queue);
file_len = ois.readInt();
root = queue.peek();
while (true) {
code_tmp = ois.readByte();
for (i = 0; i < 8; i++) {
if ((code_tmp & 128) == 128) {
root = root.rchild;
} else {
root = root.lchild;
}
if (root.lchild == null && root.rchild == null) {
fos.write(root.Byte);
++writen_len;
if (writen_len == file_len) {
break;
}
root = queue.peek();
}
code_tmp <<= 1;
}
if (writen_len == file_len) {
break;
}
}
}
fis.close();
fos.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void createTree(PriorityQueue queue) {
while (queue.size() > 1) {
HufTree min1 = queue.poll();
HufTree min2 = queue.poll();
//System.out.print(min1.weight + " " + min2.weight + " ");
HufTree NodeParent = new HufTree();
NodeParent.weight = min1.weight + min2.weight;
NodeParent.lchild = min1;
NodeParent.rchild = min2;
queue.add(NodeParent);
}
}
public void hufCode(HufTree root, String s, HashMap map) {
if (root.lchild == null && root.rchild == null) {
root.code = s;
//System.out.println("节点" + root.Byte + "编码" + s);
map.put(root.Byte, root.code);
return;
}
if (root.lchild != null) {
hufCode(root.lchild, s + '0', map);
}
if (root.rchild != null) {
hufCode(root.rchild, s + '1', map);
}
}
}
class HufTree {
public byte Byte; //以8位为单元的字节
public int weight;//该字节在文件中出现的次数
public String code; //对应的哈夫曼编码
public HufTree lchild, rchild;
}
class TmpNode implements Comparable {
public byte Byte;
public int weight;
@Override
public int compareTo(TmpNode arg0) {
if (this.weight < arg0.weight) {
return 1;
} else if (this.weight > arg0.weight) {
return -1;
}
return 0;
}
}
class Compare implements Comparator {
public int compare(HufTree o1, HufTree o2) {
if (o1.weight < o2.weight) {
return -1;
} else if (o1.weight > o2.weight) {
return 1;
}
return 0;
}
}