001    /*
002     * Copyright (C) 2008-2010 by Holger Arndt
003     *
004     * This file is part of the Universal Java Matrix Package (UJMP).
005     * See the NOTICE file distributed with this work for additional
006     * information regarding copyright ownership and licensing.
007     *
008     * UJMP is free software; you can redistribute it and/or modify
009     * it under the terms of the GNU Lesser General Public License as
010     * published by the Free Software Foundation; either version 2
011     * of the License, or (at your option) any later version.
012     *
013     * UJMP is distributed in the hope that it will be useful,
014     * but WITHOUT ANY WARRANTY; without even the implied warranty of
015     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
016     * GNU Lesser General Public License for more details.
017     *
018     * You should have received a copy of the GNU Lesser General Public
019     * License along with UJMP; if not, write to the
020     * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
021     * Boston, MA  02110-1301  USA
022     */
023    
024    package org.ujmp.core.stringmatrix.impl;
025    
026    import java.io.File;
027    import java.io.IOException;
028    import java.util.Map;
029    
030    import org.ujmp.core.collections.SoftHashMap;
031    import org.ujmp.core.exceptions.MatrixException;
032    import org.ujmp.core.stringmatrix.stub.AbstractDenseStringMatrix2D;
033    import org.ujmp.core.util.MathUtil;
034    import org.ujmp.core.util.io.SeekableLineInputStream;
035    
036    public class CSVMatrix extends AbstractDenseStringMatrix2D {
037            private static final long serialVersionUID = 6025235663309962730L;
038    
039            private String fieldDelimiter = "[,;\t]";
040    
041            private int columnCount = 0;
042    
043            private final boolean trimFields = true;
044    
045            private final boolean ignoreQuotationMarks = true;
046    
047            private final String quotation = "\"";
048    
049            private SeekableLineInputStream sli = null;
050    
051            private final Map<Long, String[]> rows = new SoftHashMap<Long, String[]>();
052    
053            public CSVMatrix(String file, Object... parameters) throws IOException {
054                    this(new File(file), parameters);
055            }
056    
057            public CSVMatrix(File file, Object... parameters) throws IOException {
058                    if (parameters.length != 0 && parameters[0] instanceof String) {
059                            this.fieldDelimiter = (String) parameters[0];
060                    } else {
061                            System.out
062                                            .println("You should specify the column separator to make sure that the file is parsed correctly.");
063                            System.out.println("Example: MatrixFactory.linkToFile(FileFormat.CSV, file, \";\")");
064                    }
065    
066                    sli = new SeekableLineInputStream(file);
067    
068                    // check 100 random lines to find maximum number of columns
069                    for (int i = 0; i < 100; i++) {
070                            String line = sli.readLine(MathUtil.nextInteger(0, sli.getLineCount() - 1));
071                            int c = line.split(fieldDelimiter).length;
072                            if (c > columnCount) {
073                                    columnCount = c;
074                            }
075                    }
076    
077                    setLabel(file.getAbsolutePath());
078            }
079    
080            public long[] getSize() {
081                    return new long[] { sli.getLineCount(), columnCount };
082            }
083    
084            public String getString(long row, long column) throws MatrixException {
085                    try {
086                            String fields[] = null;
087                            fields = rows.get(row);
088                            if (fields == null) {
089                                    String line = sli.readLine((int) row);
090                                    fields = line.split(fieldDelimiter);
091                                    if (trimFields) {
092                                            for (int i = 0; i < fields.length; i++) {
093                                                    fields[i] = fields[i].trim();
094                                            }
095                                    }
096                                    if (ignoreQuotationMarks) {
097                                            for (int i = 0; i < fields.length; i++) {
098                                                    String s = fields[i];
099                                                    if (s.length() > 1 && s.startsWith(quotation) && s.endsWith(quotation)) {
100                                                            fields[i] = s.substring(1, s.length() - 2);
101                                                    }
102                                            }
103                                    }
104                                    rows.put(row, fields);
105                            }
106                            if (fields.length > columnCount) {
107                                    columnCount = fields.length;
108                            }
109                            if (column < fields.length) {
110                                    return fields[(int) column];
111                            }
112                    } catch (Exception e) {
113                            throw new MatrixException(e);
114                    }
115                    return null;
116            }
117    
118            public void setString(String value, long row, long column) {
119            }
120    }