001    /*
002     * Copyright (C) 2008-2010 by Holger Arndt
003     *
004     * This file is part of the Universal Java Matrix Package (UJMP).
005     * See the NOTICE file distributed with this work for additional
006     * information regarding copyright ownership and licensing.
007     *
008     * UJMP is free software; you can redistribute it and/or modify
009     * it under the terms of the GNU Lesser General Public License as
010     * published by the Free Software Foundation; either version 2
011     * of the License, or (at your option) any later version.
012     *
013     * UJMP is distributed in the hope that it will be useful,
014     * but WITHOUT ANY WARRANTY; without even the implied warranty of
015     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
016     * GNU Lesser General Public License for more details.
017     *
018     * You should have received a copy of the GNU Lesser General Public
019     * License along with UJMP; if not, write to the
020     * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
021     * Boston, MA  02110-1301  USA
022     */
023    
024    package org.ujmp.core.stringmatrix.impl;
025    
026    import java.io.File;
027    import java.io.IOException;
028    import java.util.HashMap;
029    import java.util.Iterator;
030    import java.util.LinkedList;
031    import java.util.List;
032    import java.util.Map;
033    
034    import org.ujmp.core.collections.SoftHashMap;
035    import org.ujmp.core.exceptions.MatrixException;
036    import org.ujmp.core.stringmatrix.stub.AbstractSparseStringMatrix2D;
037    import org.ujmp.core.util.io.IntelligentFileReader;
038    import org.ujmp.core.util.io.SeekableLineInputStream;
039    
040    public class SparseCSVMatrix extends AbstractSparseStringMatrix2D {
041            private static final long serialVersionUID = 3021406834325366430L;
042    
043            private String fieldDelimiter = "\t";
044    
045            private long[] size = null;
046    
047            private SeekableLineInputStream sli = null;
048    
049            private final Map<Long, Object[]> data = new SoftHashMap<Long, Object[]>();
050    
051            private final Map<Long, List<Long>> rowToLine = new HashMap<Long, List<Long>>();
052    
053            public SparseCSVMatrix(String file, Object... parameters) throws IOException {
054                    this(new File(file), parameters);
055            }
056    
057            public SparseCSVMatrix(File file, Object... parameters) throws IOException {
058                    if (parameters.length != 0 && parameters[0] instanceof String) {
059                            this.fieldDelimiter = (String) parameters[0];
060                    }
061    
062                    long rows = 0;
063                    long cols = 0;
064                    long lastRow = -1;
065    
066                    IntelligentFileReader lr = new IntelligentFileReader(file);
067    
068                    System.out.print("determining matrix size.");
069                    long i = 0;
070                    String line = null;
071                    while ((line = lr.readLine()) != null) {
072                            if (i % 100000 == 0) {
073                                    System.out.print(".");
074                            }
075                            String[] fields = line.split(fieldDelimiter);
076                            long row = Long.parseLong(fields[0]);
077                            long col = Long.parseLong(fields[1]);
078                            if (row > rows) {
079                                    rows = row;
080                            }
081                            if (col > cols) {
082                                    cols = col;
083                            }
084    
085                            if (lastRow != row) {
086                                    lastRow = row;
087                                    List<Long> list = rowToLine.get(row);
088                                    if (list == null) {
089                                            list = new LinkedList<Long>();
090                                            rowToLine.put(row, list);
091                                    }
092                                    list.add(i);
093                            }
094                            i++;
095                    }
096                    lr.close();
097                    size = new long[] { rows, cols };
098                    System.out.println("ok");
099                    sli = new SeekableLineInputStream(file);
100    
101            }
102    
103            public long[] getSize() {
104                    return size;
105            }
106    
107            public String getString(long row, long column) throws MatrixException {
108                    try {
109                            List<Long> linesToCheck = rowToLine.get(row);
110                            if (linesToCheck == null) {
111                                    return null;
112                            }
113                            for (long startLine : linesToCheck) {
114                                    for (long l = startLine; l < sli.getLineCount(); l++) {
115                                            Object objects[] = data.get(l);
116                                            if (objects == null) {
117                                                    String line = sli.readLine((int) l);
118                                                    String[] strings = line.split(fieldDelimiter);
119                                                    long foundRow = Long.parseLong(strings[0]);
120                                                    long foundColumn = Long.parseLong(strings[1]);
121                                                    objects = new Object[] { foundRow, foundColumn, strings[2] };
122                                                    data.put(l, objects);
123                                            }
124    
125                                            if ((Long) objects[0] != row) {
126                                                    break;
127                                            }
128    
129                                            if ((Long) objects[1] == column) {
130                                                    return (String) objects[2];
131                                            }
132                                    }
133                            }
134                    } catch (Exception e) {
135                            throw new MatrixException(e);
136                    }
137                    return null;
138            }
139    
140            
141            public Iterable<long[]> availableCoordinates() {
142                    return new Iterable<long[]>() {
143    
144                            
145                            public Iterator<long[]> iterator() {
146                                    return new SparseCSVMatrixIterator();
147                            }
148                    };
149            }
150    
151            public void setString(String value, long row, long column) {
152            }
153    
154            
155            public boolean contains(long... coordinates) throws MatrixException {
156                    return getString(coordinates) != null;
157            }
158    
159            class SparseCSVMatrixIterator implements Iterator<long[]> {
160    
161                    long l = 0;
162    
163                    
164                    public boolean hasNext() {
165                            return l < sli.getLineCount();
166                    }
167    
168                    
169                    public long[] next() {
170                            try {
171                                    Object[] objects = data.get(l);
172                                    if (objects == null) {
173                                            String line = sli.readLine((int) l);
174                                            String[] strings = line.split(fieldDelimiter);
175                                            long row = Long.parseLong(strings[0]);
176                                            long col = Long.parseLong(strings[1]);
177                                            objects = new Object[] { row, col, strings[2] };
178                                            data.put(l, objects);
179                                    }
180    
181                                    long[] c = new long[] { (Long) objects[0], (Long) objects[1] };
182                                    l++;
183                                    return c;
184                            } catch (Exception e) {
185                                    throw new MatrixException(e);
186                            }
187                    }
188    
189                    
190                    public void remove() {
191                    }
192    
193            }
194    
195    }