java-poi4.0.1讀取word文本和圖片

package per.qy.dexter.fileoperate;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.List;
import java.util.UUID;

import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFPictureData;
import org.junit.Test;

public class WordTest {

    @Test
    public void testWord() {
        // String path = "D:\\temp\\temp\\test.doc";
        String path = "D:\\temp\\temp\\test.docx";
        String content = null;
        File file = new File(path);
        if (file.exists() && file.isFile()) {
            InputStream is = null;
            HWPFDocument doc = null;
            XWPFDocument docx = null;
            POIXMLTextExtractor extractor = null;
            try {
                is = new FileInputStream(file);
                if (path.endsWith(".doc")) {
                    doc = new HWPFDocument(is);

                    // 文檔文本內(nèi)容
                    content = doc.getDocumentText();

                    // 文檔圖片內(nèi)容
                    PicturesTable picturesTable = doc.getPicturesTable();
                    List<Picture> pictures = picturesTable.getAllPictures();
                    for (Picture picture : pictures) {
                        // 輸出圖片到磁盤
                        OutputStream out = new FileOutputStream(
                                new File("D:\\temp\\" + UUID.randomUUID() + "." + picture.suggestFileExtension()));
                        picture.writeImageContent(out);
                        out.close();
                    }
                } else if (path.endsWith("docx")) {
                    docx = new XWPFDocument(is);
                    extractor = new XWPFWordExtractor(docx);

                    // 文檔文本內(nèi)容
                    content = extractor.getText();

                    // 文檔圖片內(nèi)容
                    List<XWPFPictureData> pictures = docx.getAllPictures();
                    for (XWPFPictureData picture : pictures) {
                        byte[] bytev = picture.getData();
                        // 輸出圖片到磁盤
                        FileOutputStream out = new FileOutputStream(
                                "D:\\temp\\temp\\" + UUID.randomUUID() + picture.getFileName());
                        out.write(bytev);
                        out.close();
                    }
                } else {
                    System.out.println("此文件不是word文件!");
                }
                System.out.println(content);
            } catch (FileNotFoundException e) {
            } catch (IOException e) {
            } finally {
                try {
                    if (doc != null) {
                        doc.close();
                    }
                    if (extractor != null) {
                        extractor.close();
                    }
                    if (docx != null) {
                        docx.close();
                    }
                    if (is != null) {
                        is.close();
                    }
                } catch (IOException e) {
                }
            }
        }
    }

}
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請結(jié)合常識與多方信息審慎甄別。
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡書系信息發(fā)布平臺,僅提供信息存儲服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容