001    /*
002     * Copyright (C) 2008-2010 by Holger Arndt
003     *
004     * This file is part of the Universal Java Matrix Package (UJMP).
005     * See the NOTICE file distributed with this work for additional
006     * information regarding copyright ownership and licensing.
007     *
008     * UJMP is free software; you can redistribute it and/or modify
009     * it under the terms of the GNU Lesser General Public License as
010     * published by the Free Software Foundation; either version 2
011     * of the License, or (at your option) any later version.
012     *
013     * UJMP is distributed in the hope that it will be useful,
014     * but WITHOUT ANY WARRANTY; without even the implied warranty of
015     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
016     * GNU Lesser General Public License for more details.
017     *
018     * You should have received a copy of the GNU Lesser General Public
019     * License along with UJMP; if not, write to the
020     * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
021     * Boston, MA  02110-1301  USA
022     */
023    
024    package org.ujmp.core.util.io;
025    
026    import java.io.File;
027    import java.io.IOException;
028    import java.io.InputStream;
029    import java.util.ArrayList;
030    import java.util.Arrays;
031    import java.util.List;
032    
033    /**
034     * This stream searches all line end characters (hex 0A) in a file. For Linux
035     * line splitting is OK, for Windows the character hex 0D has to be eliminated
036     * with String.trim()
037     */
038    public class SeekableLineInputStream extends InputStream {
039    
040            private int bufferSize = 65536;
041    
042            private BufferedRandomAccessFile in = null;
043    
044            private final List<Long> lineEnds = new ArrayList<Long>();
045    
046            public SeekableLineInputStream(String file) throws IOException {
047                    this(new File(file));
048            }
049    
050            public SeekableLineInputStream(File file) throws IOException {
051                    in = new BufferedRandomAccessFile(file, "r", bufferSize);
052                    long totalLength = in.length();
053                    long maxLength = 0;
054                    long last = -1;
055                    byte[] bytes = new byte[bufferSize];
056                    for (long pos = 0; pos < totalLength; pos += bufferSize) {
057                            Arrays.fill(bytes, (byte) 0);
058                            in.read(pos, bytes);
059    
060                            for (int i = 0; i < bufferSize; i++) {
061                                    byte b = bytes[i];
062                                    if (b == 10) {
063                                            long length = pos + i - last;
064                                            if (length > maxLength) {
065                                                    maxLength = length;
066                                            }
067                                            lineEnds.add(pos + i);
068                                            last = pos + i;
069                                    }
070    
071                            }
072                    }
073    
074                    // remove last newline, if it the last byte in the file
075                    lineEnds.remove(totalLength - 1);
076    
077                    System.out.println("This stream has " + getLineCount() + " lines");
078    
079                    // if initial buffer size was too small, we have to increase it now
080                    if (maxLength + 1 > bufferSize) {
081                            bufferSize = (int) maxLength + 1;
082                            in.close();
083                            in = new BufferedRandomAccessFile(file, "r", bufferSize);
084                    }
085            }
086    
087            
088            public void close() throws IOException {
089                    in.close();
090            }
091    
092            public int getLineCount() {
093                    return lineEnds.size() + 1;
094            }
095    
096            
097            public int read() throws IOException {
098                    return in.read();
099            }
100    
101            public String readLine(int lineNumber) throws IOException {
102                    String line = null;
103                    if (line == null) {
104                            long start = 0;
105                            if (lineNumber > 0) {
106                                    start = lineEnds.get(lineNumber - 1) + 1;
107                            }
108                            long end = 0;
109                            if (lineNumber < getLineCount() - 1) {
110                                    end = lineEnds.get(lineNumber);
111                            } else {
112                                    end = in.length();
113                            }
114                            int length = (int) (end - start);
115                            byte[] bytes = new byte[length];
116                            in.read(start, bytes);
117    
118                            // eliminate Windows line end
119                            if (bytes[bytes.length - 1] == 13) {
120                                    line = new String(bytes, 0, bytes.length - 1);
121                            } else {
122                                    line = new String(bytes);
123                            }
124                    }
125                    return line;
126            }
127    }