java word 轉(zhuǎn)換成html

java代碼:

package com.cong.quartz.util;

import org.apache.commons.io.FileUtils;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.xwpf.converter.core.BasicURIResolver;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.util.List;

/**
 * word 轉(zhuǎn)換成html
 */
public class WordToHtml {

    /**
     * docx轉(zhuǎn)換成html
     */
    public static void Word2007ToHtml(String datestr,String id,String docpath,String htmlpath, String docname, String htmlname) throws IOException {
        ///mnt/ChinaApp/Tomcat/apache-tomcat-8-8060/webapps/PWebService/doc/202010/0e5b5829881647248a58c4fd94cb41e3.docx
        final String file = docpath + "/" + datestr + "/" + docname;
        File f = new File(file);
        if (!f.exists()) {
            System.out.println("Sorry File does not Exists!");
        } else {
            if (f.getName().endsWith(".docx") || f.getName().endsWith(".DOCX")) {

                // 1) 加載word文檔生成 XWPFDocument對(duì)象
                InputStream in = new FileInputStream(f);
                XWPFDocument document = new XWPFDocument(in);

                // 2) 解析 XHTML配置 (這里設(shè)置IURIResolver來(lái)設(shè)置圖片存放的目錄)
                ///mnt/ChinaApp/Tomcat/apache-tomcat-8-8060/webapps/PWebService/html/202010/80c0ce15d6e940408b157f00f10d523d/image5.png
                File htmlFolderFile = new File(htmlpath + "/" + datestr);
                if(!htmlFolderFile.exists()){
                    htmlFolderFile.mkdirs();
                }
                File imageFolderFile = new File(htmlpath + "/" + datestr+ "/" + id);
                if(!imageFolderFile.exists()){
                    imageFolderFile.mkdirs();
                }
                //XHTMLOptions options = XHTMLOptions.create().URIResolver(new FileURIResolver(imageFolderFile));
                XHTMLOptions options = XHTMLOptions.create();;
                options.setExtractor(new FileImageExtractor(imageFolderFile));
                //圖片位置---這里需要改變
                options.URIResolver(new BasicURIResolver(id));
                options.setIgnoreStylesIfUnused(false);
                options.setFragment(true);

                // 2) Prepare XHTML options (here we set the IURIResolver to load images from a "word/media" folder)
                // 3) 將 XWPFDocument轉(zhuǎn)換成XHTML
                ///mnt/ChinaApp/Tomcat/apache-tomcat-8-8060/webapps/PWebService/html/202010/00b4fe3d59ac486187f2f5173e359075.html
                String targetFileName = htmlpath + "/" + datestr + "/" + htmlname;
                OutputStreamWriter outputStreamWriter = new OutputStreamWriter(new FileOutputStream(targetFileName), "utf-8");
                XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance();
                xhtmlConverter.convert(document, outputStreamWriter, options);
                //OutputStream out = new FileOutputStream(new File(htmlpath + "/" + datestr + "/" + htmlname));
                //XHTMLConverter.getInstance().convert(document, out, options);
            } else {
                System.out.println("Enter only MS Office 2007+ files");
            }
        }
    }

    /**
     * doc轉(zhuǎn)換成html
     */
    public static void convert2Html(String datestr,String id,String docpath,String htmlpath,String docname, String htmlname)
            throws TransformerException, IOException,
            ParserConfigurationException {
        ///mnt/ChinaApp/Tomcat/apache-tomcat-8-8060/webapps/PWebService/doc/202010/0e5b5829881647248a58c4fd94cb41e3.docx
        File htmlFolderFile = new File(htmlpath + "/" + datestr);
        if(!htmlFolderFile.exists()){
            htmlFolderFile.mkdirs();
        }
        File imageFolderFile = new File(htmlpath + "/" + datestr+ "/" + id);
        if(!imageFolderFile.exists()){
            imageFolderFile.mkdirs();
        }
        HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(docpath+"/"+datestr+"/"+docname));//WordToHtmlUtils.loadDoc(new FileInputStream(inputFile));
        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
                DocumentBuilderFactory.newInstance().newDocumentBuilder()
                        .newDocument());
        wordToHtmlConverter.setPicturesManager( new PicturesManager()
        {
            public String savePicture(byte[] content,
                                      PictureType pictureType, String suggestedName,
                                      float widthInches, float heightInches )
            {
                return htmlpath + "/" + datestr + "/" + id + "/" +suggestedName;
            }
        } );
        wordToHtmlConverter.processDocument(wordDocument);
        //save pictures
        List pics=wordDocument.getPicturesTable().getAllPictures();
        if(pics!=null){
            for(int i=0;i<pics.size();i++){
                Picture pic = (Picture)pics.get(i);
                System.out.println();
                try {
                    ///mnt/ChinaApp/Tomcat/apache-tomcat-8-8060/webapps/PWebService/html/202010/80c0ce15d6e940408b157f00f10d523d/image5.png
                    pic.writeImageContent(new FileOutputStream(htmlpath + "/" + datestr + "/" + id + "/" + pic.suggestFullFileName()));
                } catch (FileNotFoundException e) {
                    e.printStackTrace();
                }
            }
        }
        Document htmlDocument = wordToHtmlConverter.getDocument();
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        DOMSource domSource = new DOMSource(htmlDocument);
        StreamResult streamResult = new StreamResult(out);

        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer = tf.newTransformer();
        //serializer.setOutputProperty(OutputKeys.ENCODING, "GB2312");
        serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");
        serializer.transform(domSource, streamResult);
        out.close();

        String content = new String(out.toByteArray());
        String imgWebPath = htmlpath + "/" + datestr + "/" + id;
        content = content.replace(imgWebPath, id);
        String targetFilePath = htmlpath + "/" + datestr + "/" +htmlname;
        FileUtils.writeStringToFile(new File(targetFilePath), content, "utf-8");
        //writeFile(new String(out.toByteArray()), htmlpath + "/" + datestr + "/" +htmlname);
    }

    public static void writeFile(String content, String path) {
        FileOutputStream fos = null;
        BufferedWriter bw = null;
        try {
            File file = new File(path);
            fos = new FileOutputStream(file);

            //    bw = new BufferedWriter(new OutputStreamWriter(fos,"GB2312"));
            bw = new BufferedWriter(new OutputStreamWriter(fos,"UTF-8"));
            bw.write(content);
        } catch (FileNotFoundException fnfe) {
            fnfe.printStackTrace();
        } catch (IOException ioe) {
            ioe.printStackTrace();
        } finally {
            try {
                if (bw != null)
                    bw.close();
                if (fos != null)
                    fos.close();
            } catch (IOException ie) {
            }
        }
    }

}

最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請(qǐng)結(jié)合常識(shí)與多方信息審慎甄別。
平臺(tái)聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡(jiǎn)書(shū)系信息發(fā)布平臺(tái),僅提供信息存儲(chǔ)服務(wù)。

友情鏈接更多精彩內(nèi)容