先要对pdf文件进行解析,然后显示出来
解析pdf文件需要pdfbox的jar包,
下面是解析pdf的代码:
package com.lingjoin.extractors;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.StringReader;
import java.util.Date;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.util.PDFTextStripper;
import com.lingjoin.paser.LingJoinFile;
/**
* PDF解析器
*
* @author Ansj
*
*/
public class PDFExtractor extends AbstractExtractor {
private String getContent(LingJoinFile f) {
// TODO Auto-generated method stub
PDDocument doc = null ;
try {
doc = PDDocument.load(f);
PDFTextStripper stripper = new PDFTextStripper();
/**
* 设置文件的信息
*/
this.setLingJoinFileInfo(f, doc
.getDocumentInformation());
return stripper.getText(doc);
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
if (doc != null) {
try {
doc.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
return "";
}
private BufferedReader getContentReader(LingJoinFile f) {
return new BufferedReader(new StringReader(this.getContent(f)));
}
/**
*
* 项目名称:FilePaser
* 类描述: 设置文件的信息
* 创建人:ANSJ
* 创建时间:2010-4-14 下午04:27:57
* 修改备注:
* @version
*/
private void setLingJoinFileInfo(LingJoinFile f, PDDocumentInformation info) {
if (info.getAuthor() != null) {
f.setlAuthor(info.getAuthor());
}
}
public void paserFileToReader(LingJoinFile f) throws Exception {
f.setlContentReader(this.getContentReader(f)) ;
}
public void paserFileToString(LingJoinFile f) throws Exception {
// TODO Auto-generated method stub
f.setlContent(this.getContent(f)) ;
}
public PDFExtractor(Integer typeFlag) {
// TODO Auto-generated constructor stub
this.typeFlag = typeFlag ;
}
private Integer typeFlag = null ;
public Integer getTypeFlag() {
// TODO Auto-generated method stub
return typeFlag;
}
}