001 /* 002 * Copyright (C) 2008-2010 by Holger Arndt 003 * 004 * This file is part of the Universal Java Matrix Package (UJMP). 005 * See the NOTICE file distributed with this work for additional 006 * information regarding copyright ownership and licensing. 007 * 008 * UJMP is free software; you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation; either version 2 011 * of the License, or (at your option) any later version. 012 * 013 * UJMP is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU Lesser General Public License for more details. 017 * 018 * You should have received a copy of the GNU Lesser General Public 019 * License along with UJMP; if not, write to the 020 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, 021 * Boston, MA 02110-1301 USA 022 */ 023 024 package org.ujmp.core.stringmatrix.impl; 025 026 import java.io.File; 027 import java.io.IOException; 028 import java.util.HashMap; 029 import java.util.Iterator; 030 import java.util.LinkedList; 031 import java.util.List; 032 import java.util.Map; 033 034 import org.ujmp.core.collections.SoftHashMap; 035 import org.ujmp.core.exceptions.MatrixException; 036 import org.ujmp.core.stringmatrix.stub.AbstractSparseStringMatrix2D; 037 import org.ujmp.core.util.io.IntelligentFileReader; 038 import org.ujmp.core.util.io.SeekableLineInputStream; 039 040 public class SparseCSVMatrix extends AbstractSparseStringMatrix2D { 041 private static final long serialVersionUID = 3021406834325366430L; 042 043 private String fieldDelimiter = "\t"; 044 045 private long[] size = null; 046 047 private SeekableLineInputStream sli = null; 048 049 private final Map<Long, Object[]> data = new SoftHashMap<Long, Object[]>(); 050 051 private final Map<Long, List<Long>> rowToLine = new HashMap<Long, List<Long>>(); 052 053 public SparseCSVMatrix(String file, Object... parameters) throws IOException { 054 this(new File(file), parameters); 055 } 056 057 public SparseCSVMatrix(File file, Object... parameters) throws IOException { 058 if (parameters.length != 0 && parameters[0] instanceof String) { 059 this.fieldDelimiter = (String) parameters[0]; 060 } 061 062 long rows = 0; 063 long cols = 0; 064 long lastRow = -1; 065 066 IntelligentFileReader lr = new IntelligentFileReader(file); 067 068 System.out.print("determining matrix size."); 069 long i = 0; 070 String line = null; 071 while ((line = lr.readLine()) != null) { 072 if (i % 100000 == 0) { 073 System.out.print("."); 074 } 075 String[] fields = line.split(fieldDelimiter); 076 long row = Long.parseLong(fields[0]); 077 long col = Long.parseLong(fields[1]); 078 if (row > rows) { 079 rows = row; 080 } 081 if (col > cols) { 082 cols = col; 083 } 084 085 if (lastRow != row) { 086 lastRow = row; 087 List<Long> list = rowToLine.get(row); 088 if (list == null) { 089 list = new LinkedList<Long>(); 090 rowToLine.put(row, list); 091 } 092 list.add(i); 093 } 094 i++; 095 } 096 lr.close(); 097 size = new long[] { rows, cols }; 098 System.out.println("ok"); 099 sli = new SeekableLineInputStream(file); 100 101 } 102 103 public long[] getSize() { 104 return size; 105 } 106 107 public String getString(long row, long column) throws MatrixException { 108 try { 109 List<Long> linesToCheck = rowToLine.get(row); 110 if (linesToCheck == null) { 111 return null; 112 } 113 for (long startLine : linesToCheck) { 114 for (long l = startLine; l < sli.getLineCount(); l++) { 115 Object objects[] = data.get(l); 116 if (objects == null) { 117 String line = sli.readLine((int) l); 118 String[] strings = line.split(fieldDelimiter); 119 long foundRow = Long.parseLong(strings[0]); 120 long foundColumn = Long.parseLong(strings[1]); 121 objects = new Object[] { foundRow, foundColumn, strings[2] }; 122 data.put(l, objects); 123 } 124 125 if ((Long) objects[0] != row) { 126 break; 127 } 128 129 if ((Long) objects[1] == column) { 130 return (String) objects[2]; 131 } 132 } 133 } 134 } catch (Exception e) { 135 throw new MatrixException(e); 136 } 137 return null; 138 } 139 140 141 public Iterable<long[]> availableCoordinates() { 142 return new Iterable<long[]>() { 143 144 145 public Iterator<long[]> iterator() { 146 return new SparseCSVMatrixIterator(); 147 } 148 }; 149 } 150 151 public void setString(String value, long row, long column) { 152 } 153 154 155 public boolean contains(long... coordinates) throws MatrixException { 156 return getString(coordinates) != null; 157 } 158 159 class SparseCSVMatrixIterator implements Iterator<long[]> { 160 161 long l = 0; 162 163 164 public boolean hasNext() { 165 return l < sli.getLineCount(); 166 } 167 168 169 public long[] next() { 170 try { 171 Object[] objects = data.get(l); 172 if (objects == null) { 173 String line = sli.readLine((int) l); 174 String[] strings = line.split(fieldDelimiter); 175 long row = Long.parseLong(strings[0]); 176 long col = Long.parseLong(strings[1]); 177 objects = new Object[] { row, col, strings[2] }; 178 data.put(l, objects); 179 } 180 181 long[] c = new long[] { (Long) objects[0], (Long) objects[1] }; 182 l++; 183 return c; 184 } catch (Exception e) { 185 throw new MatrixException(e); 186 } 187 } 188 189 190 public void remove() { 191 } 192 193 } 194 195 }