1.目前測試了20M的文件,可以讀取。
2.支持單個工作表1萬+的數據行數,耗時如圖。
3.以下是關鍵地方處理的代碼
1 //Accepts objects needed while parsing. 2 // @param styles Table of styles 3 // @param strings Table of shared strings 4 // @param cols Minimum number of columns to show 5 // @param target Sink for output 6 public MyXSSFSheetHandler( 7 StylesTable styles, 8 ReadOnlySharedStringsTable strings, 9 int cols, 10 PrintStream target) { 11 this.stylesTable = styles; 12 this.sharedStringsTable = strings; 13 this.minColumnCount = cols; 14 this.output = target; 15 this.value = new StringBuffer(); 16 this.nextDataType = xssfDataType.NUMBER; 17 this.formatter = new DataFormatter(); 18 rowlist = new ArrayList<String>(0); 19 rowReader = new RowReader(); 20 rowMap = new HashMap<Integer, String>(0); 21 rowString = new StringBuffer(); 22 } 23 // @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes) 24 public void startElement(String uri, String localName, String name, 25 Attributes attributes) throws SAXException 26 { 27 28 29 if ("inlineStr".equals(name) || "v".equals(name)) 30 { 31 vIsOpen = true; 32 // Clear contents cache 33 value.setLength(0); 34 } 35 // c => cell 36 else if ("c".equals(name)) 37 { 38 // Get the cell reference 39 String r = attributes.getValue("r"); 40 int firstDigit = -1; 41 for (int c = 0; c < r.length(); ++c) 42 { 43 if (Character.isDigit(r.charAt(c))) 44 { 45 firstDigit = c; 46 break; 47 } 48 } 49 thisColumn = nameToColumn(r.substring(0, firstDigit)); 50 51 // Set up defaults. 52 this.nextDataType = xssfDataType.NUMBER; 53 this.formatIndex = -1; 54 this.formatString = null; 55 String cellType = attributes.getValue("t"); 56 String cellStyleStr = attributes.getValue("s"); 57 if ("b".equals(cellType)) 58 nextDataType = xssfDataType.BOOL; 59 else if ("e".equals(cellType)) 60 nextDataType = xssfDataType.ERROR; 61 else if ("inlineStr".equals(cellType)) 62 nextDataType = xssfDataType.INLINESTR; 63 else if ("s".equals(cellType)) 64 nextDataType = xssfDataType.SSTINDEX; 65 else if ("str".equals(cellType)) 66 nextDataType = xssfDataType.FORMULA; 67 else if (cellStyleStr != null) { 68 // It's a number, but almost certainly one 69 // with a special style or format 70 int styleIndex = Integer.parseInt(cellStyleStr); 71 XSSFCellStyle style = stylesTable.getStyleAt(styleIndex); 72 this.formatIndex = style.getDataFormat(); 73 this.formatString = style.getDataFormatString(); 74 if (this.formatString == null) 75 this.formatString = BuiltinFormats.getBuiltinFormat(this.formatIndex); 76 } 77 } 78 79 } 80 81 82 // @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String, java.lang.String, java.lang.String) 83 public void endElement(String uri, String localName, String name) 84 throws SAXException 85 { 86 87 String thisStr = null; 88 89 // v => contents of a cell 90 if ("v".equals(name)) 91 { 92 // Process the value contents as required. 93 // Do now, as characters() may be called more than once 94 switch (nextDataType) { 95 96 case BOOL: 97 char first = value.charAt(0); 98 thisStr = first == '0' ? "FALSE" : "TRUE"; 99 break; 100 101 case ERROR: 102 thisStr = "\"ERROR:" + value.toString() + '"'; 103 break; 104 105 case FORMULA: 106 // A formula could result in a string value, 107 // so always add double-quote characters. 108 thisStr = '"' + value.toString() + '"'; 109 break; 110 111 case INLINESTR: 112 // TODO: have seen an example of this, so it's untested. 113 XSSFRichTextString rtsi = new XSSFRichTextString(value.toString()); 114 thisStr = '"' + rtsi.toString() + '"'; 115 break; 116 117 case SSTINDEX: 118 String sstIndex = value.toString(); 119 try { 120 int idx = Integer.parseInt(sstIndex); 121 XSSFRichTextString rtss = new XSSFRichTextString(sharedStringsTable.getEntryAt(idx)); 122 thisStr = '"' + rtss.toString() + '"'; 123 } catch (NumberFormatException ex) { 124 output.println("Failed to parse SST index '" + sstIndex + "': " + ex.toString()); 125 } 126 break; 127 128 case NUMBER: 129 String n = value.toString(); 130 if (this.formatString != null) 131 thisStr = formatter.formatRawCellContents(Double.parseDouble(n), this.formatIndex, this.formatString); 132 else 133 thisStr = n; 134 break; 135 136 default: 137 thisStr = "(TODO: Unexpected type: " + nextDataType + ")"; 138 break; 139 } 140 141 // Output after we've seen the string contents 142 // Emit commas for any fields that were missing on this row 143 if (lastColumnNumber == -1) 144 { 145 lastColumnNumber = 0; 146 } 147 for (int i = lastColumnNumber; i < thisColumn; ++i) 148 { 149 rowString.append(',');//每天加一個單元格的值到字符串中就追加一個逗號(末尾不添加) 150 //output.print(','); 可以看到使用output是可以將每一個單元格使用逗號分割
//但是如果使用rowlist添加到列表中,卻始終無法得到空單元格的內容
//也就是說:空單元格被忽略了。
//具體請參照標紅的地方進行處理:使用字符串拼接的方式獲得完整的行數據,再使用逗號拆分組合成rowMap
151 } 152 rowString.append(thisStr);// 這條code放在for後面,如果放在前面,會導致0和1兩個單元格合為一個單元格。 153 // Might be the empty string. 154 //output.print(thisStr); 155 rowlist.add(thisStr); 156 // Update column 157 if (thisColumn > -1) 158 { 159 lastColumnNumber = thisColumn; 160 } 161 rowIndex++; 162 } 163 else if ("row".equals(name)) 164 { 165 166 // Print out any missing commas if needed 167 if (minColumns > 0) 168 { 169 // Columns are 0 based 170 if (lastColumnNumber == -1) 171 { 172 lastColumnNumber = 0; 173 } 174 for (int i = lastColumnNumber; i < (this.minColumnCount); i++) 175 { 176 output.print(','); 177 } 178 } 179 180 // We're onto a new row 181 182 output.println(); 183 output.println(countrows++); 184 lastColumnNumber = -1; 185 rowIndex = 0; 186 //rowMap = rowReader.getRowMap(rowlist); 187 rowMap = rowReader.getRowMapByString(rowString.toString()); 188 // ADD = 189 rowLst1000.add(rowMap); 190 rowMap = null; 191 rowMap = new HashMap<Integer, String>(0); 192 if (countrows % 1000 == 0) 193 { 194 rowLst1000n.add(rowLst1000); 195 rowLst1000 = null; 196 rowLst1000 = new ArrayList<Map<Integer, String>>(0); 197 } 198 rowlist.clear(); 199 System.out.println(rowString.toString()); 200 rowString = null; 201 rowString = new StringBuffer(); 202 } 203 }
以上是我自己的處理方式,當然還有其他的處理方式,再研究吧。畢竟寫到此處的時候,我不過是一個不到1年經驗的小菜鳥。
下面附上其余代碼的參照地址:
http://www.360sdn.com/java/2014/0524/3392.html
【做而會】