`
kjkhi
  • 浏览: 181447 次
  • 性别: Icon_minigender_1
  • 来自: 广州
社区版块
存档分类
最新评论

Apache POI组件使用eventusermodel模式读取Excel文档内容

阅读更多
package com.test;

import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.util.LinkedList;
import java.util.List;

import org.apache.commons.lang.StringUtils;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

public class Test {
	
	public static void main(String[] args) {
		long a=System.currentTimeMillis();
		
		ExecThread thread = new ExecThread();
		thread.start();
		
		try {
			thread.join();
		} catch (InterruptedException e) {
			e.printStackTrace();
		}
		
		System.out.println("Main ... "+Thread.currentThread().getName());
		
		System.out.println("执行耗时 :"+(System.currentTimeMillis()-a)/1000f+" 秒 ");
	}
	
	public static class ExecThread extends Thread{
		
		public ExecThread(){}

		/* (non-Javadoc)
		 * @see java.lang.Thread#run()
		 */
		@Override
		public void run() {
			try {
				FileWriter fileWriter = new FileWriter("D:/test-out.txt");
				String excelPath = "D:/test.xlsx";
				new ExcelHandler(fileWriter,excelPath,"1","").start();
				System.out.println("ExecThread ... "+Thread.currentThread().getName());
			} catch (IOException e) {
				e.printStackTrace();
			}
			
		}
		
	}
	
	public static class ExcelHandler extends DefaultHandler{
		private final String excelPath;
		private final String sheetIndex;
		private final String nullString;
		
		/**
	     * 从<c />标签得到的单元格格式,获取当前单元格数值需要用到
	     */
	    private enum XSSFDataType {
	        BOOL,
	        ERROR,
	        FORMULA,
	        INLINESTR,
	        SSTINDEX,
	        NUMBER,
	    }
		
		private XSSFReader xssfReader;
		private ReadOnlySharedStringsTable sst;
		private StylesTable stylesTable;
		
		private boolean vIsOpen = false;
		private StringBuffer value = new StringBuffer();
		protected XSSFDataType nextDataType;
		// 当前遍历的Excel单元格列索引
		protected int thisColumnIndex = -1;
	    // The last column printed to the output stream
	    protected int lastColumnIndex = -1;
	    protected int maxColumnCount = -1;
	    protected int formatIndex;
	    protected String formatString;
	    private final DataFormatter formatter = new DataFormatter();
		
		private List<String> rowDatas = null;
		
		
	    private String targetFile;
		
		private FileWriter writer;
		public ExcelHandler(String targetFile, String excelPath, 
				String sheetIndex, String nullString){
//			this.writer = writer;
			this.targetFile = targetFile;
			this.excelPath = excelPath;
			this.sheetIndex = sheetIndex;
			this.nullString = nullString;
		}
		
		public ExcelHandler(FileWriter writer, String excelPath, 
				String sheetIndex, String nullString){
			this.writer = writer;
			this.excelPath = excelPath;
			this.sheetIndex = sheetIndex;
			this.nullString = nullString;
		}
		
		/**
		 * 初始化
		 */
		public void start(){
			try {
				OPCPackage op = OPCPackage.open(this.excelPath,PackageAccess.READ);
				this.xssfReader = new XSSFReader(op);
				this.sst = new ReadOnlySharedStringsTable(op);
				this.stylesTable = xssfReader.getStylesTable();
				
				// 开始解析
				parseXmlContent(new InputSource(getOneSheetStream(sheetIndex)));
			} catch (Exception e) {
				e.printStackTrace();
			}
			
		}
		
		
		
		/* (non-Javadoc)
		 * @see org.xml.sax.helpers.DefaultHandler#startDocument()
		 */
		@Override
		public void startDocument() throws SAXException {
			try {
				if(null == writer && StringUtils.isNotBlank(targetFile)) {
					writer = new FileWriter(this.targetFile);
				}
			} catch (IOException e) {
				e.printStackTrace();
			}
			System.out.println("start Excel document ... "+Thread.currentThread().getName());
		}

		/* (non-Javadoc)
		 * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
		 */
		@Override
		public void startElement(String uri, String localName, String qName,
				Attributes attributes) throws SAXException {
			if ("inlineStr".equals(qName) || "v".equals(qName)) {
				vIsOpen = true;
				// Clear contents cache
				value.setLength(0);
			}
			// c => cell
			else if ("c".equals(qName)) {
				// Get the cell reference
				String r = attributes.getValue("r");
				int firstDigit = -1;
				for (int c = 0; c < r.length(); ++c) {
					if (Character.isDigit(r.charAt(c))) {
						firstDigit = c;
						break;
					}
				}
				// 当前列索引
				thisColumnIndex = nameToColumn(r.substring(0, firstDigit));
				
				// Set up defaults.
				this.nextDataType = XSSFDataType.NUMBER;
				this.formatIndex = -1;
				this.formatString = null;
				String cellType = attributes.getValue("t");
				String cellStyleStr = attributes.getValue("s");
				if ("b".equals(cellType))
					nextDataType = XSSFDataType.BOOL;
				else if ("e".equals(cellType))
					nextDataType = XSSFDataType.ERROR;
				else if ("inlineStr".equals(cellType))
					nextDataType = XSSFDataType.INLINESTR;
				else if ("s".equals(cellType))
					nextDataType = XSSFDataType.SSTINDEX;
				else if ("str".equals(cellType))
					nextDataType = XSSFDataType.FORMULA;
				else if (cellStyleStr != null) {
					// It's a number, but almost certainly one
					//  with a special style or format 
					int styleIndex = Integer.parseInt(cellStyleStr);
					XSSFCellStyle style = this.stylesTable.getStyleAt(styleIndex);
					this.formatIndex = style.getDataFormat();
					this.formatString = style.getDataFormatString();
					if (this.formatString == null)
						this.formatString = BuiltinFormats.getBuiltinFormat(this.formatIndex);
				}
			}
			// row => 行开始
			else if ("row".equals(qName)) {
//				System.out.println("------------ row ---------------");
				
//				line = resultWriter.createLine();
				rowDatas = new LinkedList<String>();
				
			}
		}

		/* (non-Javadoc)
		 * @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
		 */
		@Override
		public void endElement(String uri, String localName, String qName)
				throws SAXException {
			Object thisObj = null;
			
			if("v".equals(qName)) {
				
				// 当前单元格数据
				thisObj = extractCellValue(value,nextDataType);
				
//				System.out.println("current cell value is : "+thisObj);
				
				// Output after we've seen the string contents
				// Emit commas for any fields that were missing on this row
				if (lastColumnIndex == -1) {
					lastColumnIndex = 0;
				}
				try {
					for (int i = lastColumnIndex; i < thisColumnIndex; ++i) {
//						rowDatas.append(this.fieldSplit);
//						line.addField(nullString);
						rowDatas.add(nullString);
						writer.write("|");
					}
					// Might be the empty string.
//					line.addField(thisObj==null?nullString:String.valueOf(thisObj));
					rowDatas.add(thisObj==null?nullString:String.valueOf(thisObj));
					writer.write(thisObj==null?nullString:String.valueOf(thisObj));
				}catch (Exception e) {
					e.printStackTrace();
				}
				
				// Update column
				if (thisColumnIndex > -1)
					lastColumnIndex = thisColumnIndex;
			}else if("row".equals(qName)) {
				try {
					// Print out any missing commas if needed
					if (maxColumnCount > 0) {
						// Columns are 0 based
						if (lastColumnIndex == -1) {
							lastColumnIndex = 0;
						}
						for (int i = lastColumnIndex; i < (this.maxColumnCount)-1; i++) {
//							rowDatas.append(this.fieldSplit);
//							line.addField(nullString);
							rowDatas.add(nullString);
							writer.write("|");
						}
					}
					
					// We're onto a new row
//					resultWriter.sendToWriter(line);
					ExcelReader.datas.add(rowDatas);
					
					writer.write("\r\n");
				}catch (Exception e){
					e.printStackTrace();
				}
				
				lastColumnIndex = -1;
			}
		}
		
		
		/* (non-Javadoc)
		 * @see net.bingosoft.ExcelTemplate.imports.handler.DefaultHandler#characters(char[], int, int)
		 */
		@Override
		public void characters(char[] ch, int start, int length)
				throws SAXException {
			if(vIsOpen){
				value.append(ch,start,length);
			}
		}
		

		/* (non-Javadoc)
		 * @see org.xml.sax.helpers.DefaultHandler#endDocument()
		 */
		@Override
		public void endDocument() throws SAXException {
//			resultWriter.flush();
			ExcelReader.isReturn = true;
			
			System.out.println("End Excel Document .. "+Thread.currentThread().getName());
			
			try {
				this.writer.flush();
				this.writer.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}

		/**
		 * <p>获取指定sheet的数据流</p>
		 * @param sheetId
		 * @return
		 */
		private InputStream getOneSheetStream(String sheetId){
			InputStream in = null;
			try {
				in = xssfReader.getSheet("rId"+sheetId);
			} catch (Exception e) {
				e.printStackTrace();
			} 
			return in;
		}
		
		/**
		 * <p>以xml的格式解析Excel数据</p>
		 * @param sheetSource
		 * @param tables
		 * @param table
		 * @throws Exception
		 */
		private void parseXmlContent(InputSource sheetSource) throws Exception {
			XMLReader xmlReader = null;
			try {
				System.out.println("create XML reader.");
				xmlReader = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
			} catch (Exception e) {
				e.printStackTrace();
			}
			
			xmlReader.setContentHandler(this);
			
			xmlReader.parse(sheetSource);
		}
		
		/**
		 * 从列名转换为列索引
		 * @param name
		 * @return
		 */
		private int nameToColumn(String name) {
	        int column = -1;
	        for (int i = 0; i < name.length(); ++i) {
	            int c = name.charAt(i);
	            column = (column + 1) * 26 + c - 'A';
	        }
	        return column;
	    }
		
		/**
		 * 抽取单元格数据
		 * @param value
		 * @param nextDataType
		 * @return
		 */
		private Object extractCellValue(StringBuffer value,XSSFDataType nextDataType){
			Object obj = "";
			switch(nextDataType) {
			case BOOL : 
				char first = value.charAt(0);
				obj = first=='0'?false:true;
				break;
			case ERROR : 
				obj = value.toString().trim();
				break;
			case FORMULA : 
				obj = value.toString().trim();
				break;
			case INLINESTR : 
				XSSFRichTextString rtsi = new XSSFRichTextString(value.toString());
				obj = rtsi.toString().trim();
				break;
			case SSTINDEX : 
				String sstIndex = value.toString().trim();
				try {
					int idx = Integer.parseInt(sstIndex);
					XSSFRichTextString rtss = new XSSFRichTextString(sst.getEntryAt(idx));
					obj = rtss.toString().trim();
				}catch (NumberFormatException ex) {
					ex.printStackTrace();
				}
				break;
			case NUMBER :
				String n = value.toString().trim();
				if (this.formatString != null) {
					obj = formatter.formatRawCellContents(Double.parseDouble(n), this.formatIndex, this.formatString);
				}else{
					obj = n;
				}
				break;
			default:
				obj = "";
				break;
			}
			
			return obj;
		}
	}
}

 

3
0
分享到:
评论
11 楼 kjkhi 2014-09-16  
dxb350352 写道
怎么用啊,运行完了也不知道取值的方法

结果是输出到文本的,详情看55行代码
10 楼 dxb350352 2014-08-20  
怎么用啊,运行完了也不知道取值的方法
9 楼 kjkhi 2013-07-15  
peng3510090 写道
找不到ExcelReader这个类,这个要如何呢

把那行代码去掉吧
8 楼 peng3510090 2013-07-04  
找不到ExcelReader这个类,这个要如何呢
7 楼 史汉发 2013-03-26  
能否加下QQ 729656843,想请教你一些东西
6 楼 史汉发 2013-03-24  
kjkhi 写道

是不是ClassNotFound啊

你这里是不是少了ExcelReader类啊? ExcelReader cannot be resolved
5 楼 kjkhi 2013-03-22  
史汉发 写道
kjkhi 写道
史汉发 写道
程序不能直接运行啊

是不是ClassNotFound啊

StringUtils 和ExcelReader 找不到,我已经引入了poijar包和commons-lang3-3.1.jar

另外我之前写的读取excel程序,当有3万多行时poi就会内存溢出,错误如下:" java.lang.OutOfMemoryError: Java heap space
at org.apache.poi.hssf.usermodel.HSSFCell.<init>(HSSFCell.java:180)

如果使用usermodel的方式,如HSSFWorkbook这样的方式去读大量数据的Excel文档,很容易内存溢出的,建议使用eventusermodel的方式实现
4 楼 史汉发 2013-03-22  
kjkhi 写道
史汉发 写道
程序不能直接运行啊

是不是ClassNotFound啊

加下我QQ:729656843
3 楼 史汉发 2013-03-22  
kjkhi 写道
史汉发 写道
程序不能直接运行啊

是不是ClassNotFound啊

StringUtils 和ExcelReader 找不到,我已经引入了poijar包和commons-lang3-3.1.jar

另外我之前写的读取excel程序,当有3万多行时poi就会内存溢出,错误如下:" java.lang.OutOfMemoryError: Java heap space
at org.apache.poi.hssf.usermodel.HSSFCell.<init>(HSSFCell.java:180)
2 楼 kjkhi 2013-03-19  
史汉发 写道
程序不能直接运行啊

是不是ClassNotFound啊
1 楼 史汉发 2013-03-19  
程序不能直接运行啊

相关推荐

Global site tag (gtag.js) - Google Analytics