001 /* 002 * Copyright (C) 2008-2010 by Holger Arndt 003 * 004 * This file is part of the Universal Java Matrix Package (UJMP). 005 * See the NOTICE file distributed with this work for additional 006 * information regarding copyright ownership and licensing. 007 * 008 * UJMP is free software; you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation; either version 2 011 * of the License, or (at your option) any later version. 012 * 013 * UJMP is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU Lesser General Public License for more details. 017 * 018 * You should have received a copy of the GNU Lesser General Public 019 * License along with UJMP; if not, write to the 020 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, 021 * Boston, MA 02110-1301 USA 022 */ 023 024 package org.ujmp.core.util.io; 025 026 import java.io.File; 027 import java.io.IOException; 028 import java.io.InputStream; 029 import java.util.ArrayList; 030 import java.util.Arrays; 031 import java.util.List; 032 033 /** 034 * This stream searches all line end characters (hex 0A) in a file. For Linux 035 * line splitting is OK, for Windows the character hex 0D has to be eliminated 036 * with String.trim() 037 */ 038 public class SeekableLineInputStream extends InputStream { 039 040 private int bufferSize = 65536; 041 042 private BufferedRandomAccessFile in = null; 043 044 private final List<Long> lineEnds = new ArrayList<Long>(); 045 046 public SeekableLineInputStream(String file) throws IOException { 047 this(new File(file)); 048 } 049 050 public SeekableLineInputStream(File file) throws IOException { 051 in = new BufferedRandomAccessFile(file, "r", bufferSize); 052 long totalLength = in.length(); 053 long maxLength = 0; 054 long last = -1; 055 byte[] bytes = new byte[bufferSize]; 056 for (long pos = 0; pos < totalLength; pos += bufferSize) { 057 Arrays.fill(bytes, (byte) 0); 058 in.read(pos, bytes); 059 060 for (int i = 0; i < bufferSize; i++) { 061 byte b = bytes[i]; 062 if (b == 10) { 063 long length = pos + i - last; 064 if (length > maxLength) { 065 maxLength = length; 066 } 067 lineEnds.add(pos + i); 068 last = pos + i; 069 } 070 071 } 072 } 073 074 // remove last newline, if it the last byte in the file 075 lineEnds.remove(totalLength - 1); 076 077 System.out.println("This stream has " + getLineCount() + " lines"); 078 079 // if initial buffer size was too small, we have to increase it now 080 if (maxLength + 1 > bufferSize) { 081 bufferSize = (int) maxLength + 1; 082 in.close(); 083 in = new BufferedRandomAccessFile(file, "r", bufferSize); 084 } 085 } 086 087 088 public void close() throws IOException { 089 in.close(); 090 } 091 092 public int getLineCount() { 093 return lineEnds.size() + 1; 094 } 095 096 097 public int read() throws IOException { 098 return in.read(); 099 } 100 101 public String readLine(int lineNumber) throws IOException { 102 String line = null; 103 if (line == null) { 104 long start = 0; 105 if (lineNumber > 0) { 106 start = lineEnds.get(lineNumber - 1) + 1; 107 } 108 long end = 0; 109 if (lineNumber < getLineCount() - 1) { 110 end = lineEnds.get(lineNumber); 111 } else { 112 end = in.length(); 113 } 114 int length = (int) (end - start); 115 byte[] bytes = new byte[length]; 116 in.read(start, bytes); 117 118 // eliminate Windows line end 119 if (bytes[bytes.length - 1] == 13) { 120 line = new String(bytes, 0, bytes.length - 1); 121 } else { 122 line = new String(bytes); 123 } 124 } 125 return line; 126 } 127 }