请教:POI中抽取WORD2007内容出现问题
环境是 weblogic9.2+jdk1.5参照帖写的抽取方法
http://www.cnblogs.com/gaoyoubo/articles/1759383.html
/**
* * @Method: extractTextFromDOCX
* @Description: 从word 2007文档中提取纯文本
* @param
* @return String
* @throws
*/
public static String extractTextFromDOC2007(String fileName) throws
IOException, OpenXML4JException, XmlException {
OPCPackage opcPackage = POIXMLDocument.openPackage(fileName);
POIXMLTextExtractor ex = new XWPFWordExtractor(opcPackage);
//该行是(Office2007Extractor.java:118)出错行
return ex.getText();
}
用到包:
/**
* * 实际本工程中需要的jar包:
* poi-3.6-20091214.jar
* poi-contrib-3.6-20091214.jar
* poi-ooxml-3.6-20091214.jar
* poi-ooxml-schemas-3.6-20091214.jar
* poi-scratchpad-3.6-20091214.jar
* xmlbeans-2.3.0.jar
* geronimo-stax-api_1.0_spec-1.0.jar
* dom4j-1.6.1.jar
*/
异常错误:
org.apache.poi.POIXMLException: java.lang.reflect.InvocationTargetException
at org.apache.poi.xwpf.usermodel.XWPFFactory.createDocumentPart(XWPFFactory.java:60)
at org.apache.poi.POIXMLDocumentPart.read(POIXMLDocumentPart.java:256)
at org.apache.poi.POIXMLDocument.load(POIXMLDocument.java:196)
at org.apache.poi.xwpf.usermodel.XWPFDocument.<init>(XWPFDocument.java:94)
at org.apache.poi.xwpf.extractor.XWPFWordExtractor.<init>(XWPFWordExtractor.java:45)
at com.zzxy.common.office.Office2007Extractor.extractTextFromDOC2007(Office2007Extractor.java:118)
at com.zzxy.common.search.FileDocument.Document(FileDocument.java:144)
at com.zzxy.common.search.IndexFiles.addSingleIndex(IndexFiles.java:490)
at com.zzxy.common.search.IndexFiles.indexDocs(IndexFiles.java:432)
at com.zzxy.common.search.IndexFiles.createZLKIndex(IndexFiles.java:117)
at com.zzxy.model.zx.zhgl.ZX_ZHGL_ZLKWH_Agent.reIndexDocs(ZX_ZHGL_ZLKWH_Agent.java:784)
at com.zzxy.model.zx.zhgl.ZX_ZHGL_ZLKWH_Agent.addZLKFile(ZX_ZHGL_ZLKWH_Agent.java:283)
at com.zzxy.model.zx.zhgl.ZX_ZHGL_ZLKWH_Agent.execute(ZX_ZHGL_ZLKWH_Agent.java:96)
at com.zzxy.model.BusinessAgent.service(BusinessAgent.java:87)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:856)
at weblogic.servlet.internal.StubSecurityHelper$ServletServiceAction.run(StubSecurityHelper.java:225)
at weblogic.servlet.internal.StubSecurityHelper.invokeServlet(StubSecurityHelper.java:127)
at weblogic.servlet.internal.ServletStubImpl.execute(ServletStubImpl.java:283)
at weblogic.servlet.internal.ServletStubImpl.execute(ServletStubImpl.java:175)
at weblogic.servlet.internal.RequestDispatcherImpl.invokeServlet(RequestDispatcherImpl.java:499)
at weblogic.servlet.internal.RequestDispatcherImpl.forward(RequestDispatcherImpl.java:245)
at com.zzxy.control.HTTPController.service(HTTPController.java:93)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:856)
at weblogic.servlet.internal.StubSecurityHelper$ServletServiceAction.run(StubSecurityHelper.java:225)
at weblogic.servlet.internal.StubSecurityHelper.invokeServlet(StubSecurityHelper.java:127)
at weblogic.servlet.internal.ServletStubImpl.execute(ServletStubImpl.java:283)
at weblogic.servlet.internal.ServletStubImpl.execute(ServletStubImpl.java:175)
at weblogic.servlet.internal.WebAppServletContext$ServletInvocationAction.run(WebAppServletContext.java:3214)
at weblogic.security.acl.internal.AuthenticatedSubject.doAs(AuthenticatedSubject.java:321)
at weblogic.security.service.SecurityManager.runAs(SecurityManager.java:121)
at weblogic.servlet.internal.WebAppServletContext.securedExecute(WebAppServletContext.java:1983)
at weblogic.servlet.internal.WebAppServletContext.execute(WebAppServletContext.java:1890)
at weblogic.servlet.internal.ServletRequestImpl.run(ServletRequestImpl.java:1344)
at weblogic.work.ExecuteThread.execute(ExecuteThread.java:209)
at weblogic.work.ExecuteThread.run(ExecuteThread.java:181)
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:39)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:27)
at java.lang.reflect.Constructor.newInstance(Constructor.java:494)
at org.apache.poi.xwpf.usermodel.XWPFFactory.createDocumentPart(XWPFFactory.java:58)
... 34 more
Caused by: java.lang.ExceptionInInitializerError
at sun.misc.Unsafe.ensureClassInitialized(Native Method)
at sun.reflect.UnsafeFieldAccessorFactory.newFieldAccessor(UnsafeFieldAccessorFactory.java:25)
at sun.reflect.ReflectionFactory.newFieldAccessor(ReflectionFactory.java:122)
at java.lang.reflect.Field.acquireFieldAccessor(Field.java:917)
at java.lang.reflect.Field.getFieldAccessor(Field.java:898)
at java.lang.reflect.Field.get(Field.java:357)
at org.apache.xmlbeans.XmlBeans.typeSystemForClassLoader(XmlBeans.java:770)
at org.openxmlformats.schemas.wordprocessingml.x2006.main.SettingsDocument.<clinit>(Unknown Source)
at org.openxmlformats.schemas.wordprocessingml.x2006.main.SettingsDocument$Factory.parse(Unknown Source)
at org.apache.poi.xwpf.usermodel.XWPFSettings.readFrom(XWPFSettings.java:129)
at org.apache.poi.xwpf.usermodel.XWPFSettings.<init>(XWPFSettings.java:43)
... 39 more
Caused by: java.lang.RuntimeException: Could not instantiate SchemaTypeSystemImpl (java.lang.reflect.InvocationTargetException): is the version of xbean.jar correct?
at schemaorg_apache_xmlbeans.system.sE130CAA0A01A7CDE5A2B4FEB8B311707.TypeSystemHolder.loadTypeSystem(Unknown Source)
at schemaorg_apache_xmlbeans.system.sE130CAA0A01A7CDE5A2B4FEB8B311707.TypeSystemHolder.<clinit>(Unknown Source)
... 50 more
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:39)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:27)
at java.lang.reflect.Constructor.newInstance(Constructor.java:494)
... 52 more
Caused by: org.apache.xmlbeans.SchemaTypeLoaderException: XML-BEANS compiled schema: Incompatible minor version - expecting up to 23, got 24 (schemaorg_apache_xmlbeans.system.sE130CAA0A01A7CDE5A2B4FEB8B311707.index) - code 3
at org.apache.xmlbeans.impl.schema.SchemaTypeSystemImpl$XsbReader.<init>(SchemaTypeSystemImpl.java:1522)
at org.apache.xmlbeans.impl.schema.SchemaTypeSystemImpl.initFromHeader(SchemaTypeSystemImpl.java:260)
at org.apache.xmlbeans.impl.schema.SchemaTypeSystemImpl.<init>(SchemaTypeSystemImpl.java:183)
... 56 more --------------------编程问答-------------------- 没有人关注这块么?? --------------------编程问答-------------------- 打开 word 看看那行是什么 --------------------编程问答-------------------- 楼上的 给word 文件有什么关系
拜托看清楚问题 --------------------编程问答--------------------
帮你解决问题 你却如此态度...领教了~! --------------------编程问答-------------------- 楼上的,不好意思。
主要你都没有看问题
这都不是word文件的问题,是poi 问题或者包冲突问题
--------------------编程问答-------------------- 如果我单独建立工程,参照帖写的抽取方法
http://www.cnblogs.com/gaoyoubo/articles/1759383.html
是正常的,但是如果加入工程中就会有如下错误:
初步估计包冲突,但是又找不到给那个包冲突。
word文件名字=====D:/word 2007.docx
org.apache.poi.POIXMLException: java.lang.reflect.InvocationTargetException
at org.apache.poi.xwpf.usermodel.XWPFFactory.createDocumentPart(XWPFFactory.java:60)
at org.apache.poi.POIXMLDocumentPart.read(POIXMLDocumentPart.java:256)
at org.apache.poi.POIXMLDocument.load(POIXMLDocument.java:196)
at org.apache.poi.xwpf.usermodel.XWPFDocument.<init>(XWPFDocument.java:94)
at org.apache.poi.xwpf.extractor.XWPFWordExtractor.<init>(XWPFWordExtractor.java:45)
at com.zzxy.common.office.Office2007Extractor.extractTextFromDOC2007(Office2007Extractor.java:118)
at com.zzxy.common.search.FileDocument.Document(FileDocument.java:145)
at com.zzxy.common.search.IndexFiles.addSingleIndex(IndexFiles.java:490)
at com.zzxy.common.search.IndexFiles.indexDocs(IndexFiles.java:432)
at com.zzxy.common.search.IndexFiles.createZLKIndex(IndexFiles.java:117)
at com.zzxy.model.zx.zhgl.ZX_ZHGL_ZLKWH_Agent.reIndexDocs(ZX_ZHGL_ZLKWH_Agent.java:784)
at com.zzxy.model.zx.zhgl.ZX_ZHGL_ZLKWH_Agent.addZLKFile(ZX_ZHGL_ZLKWH_Agent.java:283)
at com.zzxy.model.zx.zhgl.ZX_ZHGL_ZLKWH_Agent.execute(ZX_ZHGL_ZLKWH_Agent.java:96)
at com.zzxy.model.BusinessAgent.service(BusinessAgent.java:87)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:856)
at weblogic.servlet.internal.StubSecurityHelper$ServletServiceAction.run(StubSecurityHelper.java:225)
at weblogic.servlet.internal.StubSecurityHelper.invokeServlet(StubSecurityHelper.java:127)
at weblogic.servlet.internal.ServletStubImpl.execute(ServletStubImpl.java:283)
at weblogic.servlet.internal.ServletStubImpl.execute(ServletStubImpl.java:175)
at weblogic.servlet.internal.RequestDispatcherImpl.invokeServlet(RequestDispatcherImpl.java:499)
at weblogic.servlet.internal.RequestDispatcherImpl.forward(RequestDispatcherImpl.java:245)
at com.zzxy.control.HTTPController.service(HTTPController.java:93)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:856)
at weblogic.servlet.internal.StubSecurityHelper$ServletServiceAction.run(StubSecurityHelper.java:225)
at weblogic.servlet.internal.StubSecurityHelper.invokeServlet(StubSecurityHelper.java:127)
at weblogic.servlet.internal.ServletStubImpl.execute(ServletStubImpl.java:283)
at weblogic.servlet.internal.ServletStubImpl.execute(ServletStubImpl.java:175)
at weblogic.servlet.internal.WebAppServletContext$ServletInvocationAction.run(WebAppServletContext.java:3214)
at weblogic.security.acl.internal.AuthenticatedSubject.doAs(AuthenticatedSubject.java:321)
at weblogic.security.service.SecurityManager.runAs(SecurityManager.java:121)
at weblogic.servlet.internal.WebAppServletContext.securedExecute(WebAppServletContext.java:1983)
at weblogic.servlet.internal.WebAppServletContext.execute(WebAppServletContext.java:1890)
at weblogic.servlet.internal.ServletRequestImpl.run(ServletRequestImpl.java:1344)
at weblogic.work.ExecuteThread.execute(ExecuteThread.java:209)
at weblogic.work.ExecuteThread.run(ExecuteThread.java:181)
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:39)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:27)
at java.lang.reflect.Constructor.newInstance(Constructor.java:494)
at org.apache.poi.xwpf.usermodel.XWPFFactory.createDocumentPart(XWPFFactory.java:58)
... 34 more
Caused by: java.lang.ExceptionInInitializerError
at sun.misc.Unsafe.ensureClassInitialized(Native Method)
at sun.reflect.UnsafeFieldAccessorFactory.newFieldAccessor(UnsafeFieldAccessorFactory.java:25)
at sun.reflect.ReflectionFactory.newFieldAccessor(ReflectionFactory.java:122)
at java.lang.reflect.Field.acquireFieldAccessor(Field.java:917)
at java.lang.reflect.Field.getFieldAccessor(Field.java:898)
at java.lang.reflect.Field.get(Field.java:357)
at org.apache.xmlbeans.XmlBeans.typeSystemForClassLoader(XmlBeans.java:770)
at org.openxmlformats.schemas.wordprocessingml.x2006.main.SettingsDocument.<clinit>(Unknown Source)
at org.openxmlformats.schemas.wordprocessingml.x2006.main.SettingsDocument$Factory.parse(Unknown Source)
at org.apache.poi.xwpf.usermodel.XWPFSettings.readFrom(XWPFSettings.java:129)
at org.apache.poi.xwpf.usermodel.XWPFSettings.<init>(XWPFSettings.java:43)
... 39 more
Caused by: java.lang.RuntimeException: Could not instantiate SchemaTypeSystemImpl (java.lang.reflect.InvocationTargetException): is the version of xbean.jar correct?
at schemaorg_apache_xmlbeans.system.sE130CAA0A01A7CDE5A2B4FEB8B311707.TypeSystemHolder.loadTypeSystem(Unknown Source)
at schemaorg_apache_xmlbeans.system.sE130CAA0A01A7CDE5A2B4FEB8B311707.TypeSystemHolder.<clinit>(Unknown Source)
... 50 more
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:39)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:27)
at java.lang.reflect.Constructor.newInstance(Constructor.java:494)
... 52 more
Caused by: org.apache.xmlbeans.SchemaTypeLoaderException: XML-BEANS compiled schema: Incompatible minor version - expecting up to 23, got 24 (schemaorg_apache_xmlbeans.system.sE130CAA0A01A7CDE5A2B4FEB8B311707.index) - code 3
at org.apache.xmlbeans.impl.schema.SchemaTypeSystemImpl$XsbReader.<init>(SchemaTypeSystemImpl.java:1522)
at org.apache.xmlbeans.impl.schema.SchemaTypeSystemImpl.initFromHeader(SchemaTypeSystemImpl.java:260)
at org.apache.xmlbeans.impl.schema.SchemaTypeSystemImpl.<init>(SchemaTypeSystemImpl.java:183)
... 56 more --------------------编程问答-------------------- XML-BEANS compiled schema: Incompatible minor version - expecting up to 23, got 24
对poi不熟悉,但是从异常上来看,似乎是xml-beans的版本不正确。 --------------------编程问答--------------------
--------------------编程问答--------------------
package test;
// 生成Excel的类
import java.io.File;
import jxl.Workbook;
import jxl.write.Label;
import jxl.write.WritableSheet;
import jxl.write.WritableWorkbook;
public class CreateExcel {
public static void main(String args[]) {
try {
// 打开文件
WritableWorkbook book = Workbook.createWorkbook(new File("aa.xls"));
// 生成名为“第一页”的工作表,参数0表示这是第一页
WritableSheet sheet = book.createSheet("第一页",0);
// 在Label对象的构造子中指名单元格位置是第一列第一行(0,0)
// 以及单元格内容为test
//Label label = new Label(0,0,"testssss");
for(int i=0;i<10;i++){
for (int j=0;j<10;j++){
Label label = new Label(i,j,i*j+"");
sheet.addCell(label);
}
}
// 将定义好的单元格添加到工作表中
// 生成一个保存数字的单元格 必须使用Number的完整包路径,否则有语法歧义 单元格位置是第二列,第一行,值为789.123
jxl.write.Number number = new jxl.write.Number(1, 0, 555.12541);
sheet.addCell(number);
// 写入数据并关闭文件
book.write();
book.close();
} catch (Exception e) {
System.out.println(e);
}
}
}
--------------------编程问答--------------------
package test;
// 读取Excel的类
import java.io.File;
import jxl.Cell;
import jxl.Sheet;
import jxl.Workbook;
public class ReadExcel {
public static void main(String args[]) {
try {
Workbook book = Workbook.getWorkbook(new File("test.xls "));
//获得第一个工作表对象
Sheet sheet = book.getSheet(0);
//得到第一列第一行的单元格
int rows = sheet.getRows();
int column = sheet.getColumns();
for (int i=0;i<rows;i++){
for (int j=0;j<column;j++){
Cell cell1 = sheet.getCell(j, i);
String result = cell1.getContents();
System.out.print(result+",");
}
System.out.println("-----------------------------");
}
book.close();
} catch (Exception e) {
System.out.println(e);
}
}
}
--------------------编程问答-------------------- POI,对office2007的格式docx有些不支持,90%的可能性是文件的问题.你看看使用低版本的word格式是否再出现问题.
package test;
import java.io.File;
import jxl.Workbook;
import jxl.write.Label;
import jxl.write.WritableSheet;
import jxl.write.WritableWorkbook;
public class UpdateExcel {
public static void main(String args[]) {
try {
//Excel获得文件
Workbook wb = Workbook.getWorkbook(new File("test.xls"));
//打开一个文件的副本,并且指定数据写回到原文件
WritableWorkbook book = Workbook.createWorkbook(new File("test.xls"), wb);
//添加一个工作表
WritableSheet sheet = book.createSheet("第二页 ", 1);
sheet.addCell(new Label(0, 0, "第二页的测试数据 "));
book.write();
book.close();
} catch (Exception e) {
System.out.println(e);
}
}
}
还有,你请教的态度出现问题,搞程序应该考虑到所有的可能性,什么可能性都应该去试试 --------------------编程问答-------------------- 问题解决了,总结下: 主要原因不是poi不支持问题,是加载包冲突。weblogic本身用到包优先加载引起的。 --------------------编程问答-------------------- 应该是包冲突,我单独建工程,没错。
我在web项目中,不启动tomcat,有错,启动tomcat后,还是有错。就应该是包冲突。
我的是放了poi3.5 和 poi3.7. --------------------编程问答-------------------- --------------------编程问答-------------------- 暂时别去解析2007吧,问题很多的。 --------------------编程问答--------------------
怎么就没了,可不可以把解决办法贴出来啊,让遇到这个问题的人继续头疼,是何解啊? --------------------编程问答--------------------
能否说说问题的解决过程吗?在线等.............. --------------------编程问答-------------------- POI的强项是excel,不是word! --------------------编程问答-------------------- 换成java6 试试, 碰到过类似的
补充:Java , Java EE