You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
155 lines
5.1 KiB
155 lines
5.1 KiB
import javax.mail.MessagingException;
|
|
import javax.mail.Multipart;
|
|
import javax.mail.Session;
|
|
import javax.mail.internet.MimeBodyPart;
|
|
import javax.mail.internet.MimeMessage;
|
|
import javax.mail.internet.MimeMultipart;
|
|
import java.io.*;
|
|
import java.util.Enumeration;
|
|
|
|
|
|
public class Mht2HtmlUtil {
|
|
|
|
public static void main(String[] args) throws Exception {
|
|
mht2html("C:\\Users\\Administrator\\Desktop\\028初中阶段校数、班数.mht", "d:\\test.htm");
|
|
}
|
|
|
|
|
|
/**
|
|
* 将 mht文件转换成 html文件
|
|
*
|
|
* @param srcMht // mht 文件的位置
|
|
* @param descHtml // 转换后输出的HTML的位置
|
|
*/
|
|
public static void mht2html(String srcMht, String descHtml) {
|
|
try {
|
|
InputStream fis = new FileInputStream(srcMht);
|
|
Session mailSession = Session.getDefaultInstance(
|
|
System.getProperties(), null);
|
|
MimeMessage msg = new MimeMessage(mailSession, fis);
|
|
Object content = msg.getContent();
|
|
if (content instanceof Multipart) {
|
|
MimeMultipart mp = (MimeMultipart) content;
|
|
MimeBodyPart bp1 = (MimeBodyPart) mp.getBodyPart(0);
|
|
|
|
// 获取mht文件内容代码的编码
|
|
String strEncodng = getEncoding(bp1);
|
|
|
|
// 获取mht文件的内容
|
|
String strText = getHtmlText(bp1, strEncodng);
|
|
if (strText == null)
|
|
return;
|
|
// 最后保存HTML文件
|
|
SaveHtml(strText, descHtml);
|
|
}
|
|
} catch (Exception e) {
|
|
e.printStackTrace();
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* 将提取出来的html内容写入保存的路径中。
|
|
*
|
|
* @param s_HtmlTxt
|
|
* @param s_HtmlPath
|
|
*/
|
|
public static boolean SaveHtml(String s_HtmlTxt, String s_HtmlPath) {
|
|
try {
|
|
Writer out = null;
|
|
out = new OutputStreamWriter(
|
|
new FileOutputStream(s_HtmlPath, false), "utf-8");
|
|
out.write(s_HtmlTxt);
|
|
out.close();
|
|
} catch (Exception e) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* 获取mht文件中的内容代码
|
|
*
|
|
* @param bp
|
|
* @param strEncoding 该mht文件的编码
|
|
* @return
|
|
*/
|
|
private static String getHtmlText(MimeBodyPart bp, String strEncoding) {
|
|
InputStream textStream = null;
|
|
BufferedInputStream buff = null;
|
|
BufferedReader br = null;
|
|
Reader r = null;
|
|
try {
|
|
textStream = bp.getInputStream();
|
|
buff = new BufferedInputStream(textStream);
|
|
r = new InputStreamReader(buff, strEncoding);
|
|
br = new BufferedReader(r);
|
|
StringBuffer strHtml = new StringBuffer("");
|
|
String strLine = null;
|
|
while ((strLine = br.readLine()) != null) {
|
|
System.out.println(strLine);
|
|
strHtml.append(strLine + "\r\n");
|
|
}
|
|
br.close();
|
|
r.close();
|
|
textStream.close();
|
|
return strHtml.toString();
|
|
} catch (Exception e) {
|
|
e.printStackTrace();
|
|
} finally {
|
|
try {
|
|
if (br != null)
|
|
br.close();
|
|
if (buff != null)
|
|
buff.close();
|
|
if (textStream != null)
|
|
textStream.close();
|
|
} catch (Exception e) {
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* 获取mht网页文件中内容代码的编码
|
|
*
|
|
* @param bp
|
|
* @return
|
|
*/
|
|
private static String getEncoding(MimeBodyPart bp) {
|
|
if (bp == null) {
|
|
return null;
|
|
}
|
|
try {
|
|
Enumeration list = bp.getAllHeaders();
|
|
while (list.hasMoreElements()) {
|
|
javax.mail.Header head = (javax.mail.Header) list.nextElement();
|
|
if (head.getName().equalsIgnoreCase("Content-Type")) {
|
|
String strType = head.getValue();
|
|
int pos = strType.indexOf("charset=");
|
|
if (pos >= 0) {
|
|
String strEncoding = strType.substring(pos + 8,
|
|
strType.length());
|
|
if (strEncoding.startsWith("\"")
|
|
|| strEncoding.startsWith("\'")) {
|
|
strEncoding = strEncoding.substring(1,
|
|
strEncoding.length());
|
|
}
|
|
if (strEncoding.endsWith("\"")
|
|
|| strEncoding.endsWith("\'")) {
|
|
strEncoding = strEncoding.substring(0,
|
|
strEncoding.length() - 1);
|
|
}
|
|
if (strEncoding.toLowerCase().compareTo("gb2312") == 0) {
|
|
strEncoding = "gbk";
|
|
}
|
|
return strEncoding;
|
|
}
|
|
}
|
|
}
|
|
} catch (MessagingException e) {
|
|
e.printStackTrace();
|
|
}
|
|
return null;
|
|
}
|
|
} |