001 /* 002 * Copyright (C) 2008-2010 by Holger Arndt 003 * 004 * This file is part of the Universal Java Matrix Package (UJMP). 005 * See the NOTICE file distributed with this work for additional 006 * information regarding copyright ownership and licensing. 007 * 008 * UJMP is free software; you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation; either version 2 011 * of the License, or (at your option) any later version. 012 * 013 * UJMP is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU Lesser General Public License for more details. 017 * 018 * You should have received a copy of the GNU Lesser General Public 019 * License along with UJMP; if not, write to the 020 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, 021 * Boston, MA 02110-1301 USA 022 */ 023 024 package org.ujmp.core.stringmatrix.impl; 025 026 import java.io.File; 027 import java.io.IOException; 028 import java.util.Map; 029 030 import org.ujmp.core.collections.SoftHashMap; 031 import org.ujmp.core.exceptions.MatrixException; 032 import org.ujmp.core.stringmatrix.stub.AbstractDenseStringMatrix2D; 033 import org.ujmp.core.util.MathUtil; 034 import org.ujmp.core.util.io.SeekableLineInputStream; 035 036 public class CSVMatrix extends AbstractDenseStringMatrix2D { 037 private static final long serialVersionUID = 6025235663309962730L; 038 039 private String fieldDelimiter = "[,;\t]"; 040 041 private int columnCount = 0; 042 043 private final boolean trimFields = true; 044 045 private final boolean ignoreQuotationMarks = true; 046 047 private final String quotation = "\""; 048 049 private SeekableLineInputStream sli = null; 050 051 private final Map<Long, String[]> rows = new SoftHashMap<Long, String[]>(); 052 053 public CSVMatrix(String file, Object... parameters) throws IOException { 054 this(new File(file), parameters); 055 } 056 057 public CSVMatrix(File file, Object... parameters) throws IOException { 058 if (parameters.length != 0 && parameters[0] instanceof String) { 059 this.fieldDelimiter = (String) parameters[0]; 060 } else { 061 System.out 062 .println("You should specify the column separator to make sure that the file is parsed correctly."); 063 System.out.println("Example: MatrixFactory.linkToFile(FileFormat.CSV, file, \";\")"); 064 } 065 066 sli = new SeekableLineInputStream(file); 067 068 // check 100 random lines to find maximum number of columns 069 for (int i = 0; i < 100; i++) { 070 String line = sli.readLine(MathUtil.nextInteger(0, sli.getLineCount() - 1)); 071 int c = line.split(fieldDelimiter).length; 072 if (c > columnCount) { 073 columnCount = c; 074 } 075 } 076 077 setLabel(file.getAbsolutePath()); 078 } 079 080 public long[] getSize() { 081 return new long[] { sli.getLineCount(), columnCount }; 082 } 083 084 public String getString(long row, long column) throws MatrixException { 085 try { 086 String fields[] = null; 087 fields = rows.get(row); 088 if (fields == null) { 089 String line = sli.readLine((int) row); 090 fields = line.split(fieldDelimiter); 091 if (trimFields) { 092 for (int i = 0; i < fields.length; i++) { 093 fields[i] = fields[i].trim(); 094 } 095 } 096 if (ignoreQuotationMarks) { 097 for (int i = 0; i < fields.length; i++) { 098 String s = fields[i]; 099 if (s.length() > 1 && s.startsWith(quotation) && s.endsWith(quotation)) { 100 fields[i] = s.substring(1, s.length() - 2); 101 } 102 } 103 } 104 rows.put(row, fields); 105 } 106 if (fields.length > columnCount) { 107 columnCount = fields.length; 108 } 109 if (column < fields.length) { 110 return fields[(int) column]; 111 } 112 } catch (Exception e) { 113 throw new MatrixException(e); 114 } 115 return null; 116 } 117 118 public void setString(String value, long row, long column) { 119 } 120 }