001    /*
002     * Copyright (C) 2008-2010 by Holger Arndt
003     *
004     * This file is part of the Universal Java Matrix Package (UJMP).
005     * See the NOTICE file distributed with this work for additional
006     * information regarding copyright ownership and licensing.
007     *
008     * UJMP is free software; you can redistribute it and/or modify
009     * it under the terms of the GNU Lesser General Public License as
010     * published by the Free Software Foundation; either version 2
011     * of the License, or (at your option) any later version.
012     *
013     * UJMP is distributed in the hope that it will be useful,
014     * but WITHOUT ANY WARRANTY; without even the implied warranty of
015     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
016     * GNU Lesser General Public License for more details.
017     *
018     * You should have received a copy of the GNU Lesser General Public
019     * License along with UJMP; if not, write to the
020     * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
021     * Boston, MA  02110-1301  USA
022     */
023    
024    package org.ujmp.lucene;
025    
026    import java.io.ByteArrayInputStream;
027    import java.io.ByteArrayOutputStream;
028    import java.io.Closeable;
029    import java.io.File;
030    import java.io.Flushable;
031    import java.io.IOException;
032    import java.io.ObjectInputStream;
033    import java.io.ObjectOutputStream;
034    import java.io.Serializable;
035    import java.util.HashSet;
036    import java.util.Set;
037    
038    import org.apache.lucene.analysis.Analyzer;
039    import org.apache.lucene.analysis.standard.StandardAnalyzer;
040    import org.apache.lucene.document.Document;
041    import org.apache.lucene.document.Field;
042    import org.apache.lucene.index.CorruptIndexException;
043    import org.apache.lucene.index.IndexReader;
044    import org.apache.lucene.index.IndexWriter;
045    import org.apache.lucene.index.Term;
046    import org.apache.lucene.index.IndexWriter.MaxFieldLength;
047    import org.apache.lucene.search.IndexSearcher;
048    import org.apache.lucene.search.ScoreDoc;
049    import org.apache.lucene.search.TermQuery;
050    import org.apache.lucene.search.TopDocs;
051    import org.apache.lucene.search.WildcardQuery;
052    import org.apache.lucene.store.Directory;
053    import org.apache.lucene.store.FSDirectory;
054    import org.apache.lucene.util.Version;
055    import org.ujmp.core.collections.AbstractMap;
056    import org.ujmp.core.exceptions.MatrixException;
057    import org.ujmp.core.interfaces.Erasable;
058    import org.ujmp.core.util.StringUtil;
059    import org.ujmp.core.util.io.FileUtil;
060    
061    public class LuceneMap<K, V> extends AbstractMap<K, V> implements Flushable,
062                    Closeable, Erasable {
063            private static final long serialVersionUID = 8998898900190996038L;
064    
065            private static final String KEYSTRING = "KS";
066    
067            private static final String KEYDATA = "KD";
068    
069            private static final String VALUESTRING = "VS";
070    
071            private static final String VALUEDATA = "VD";
072    
073            private transient Directory directory = null;
074    
075            private transient IndexWriter indexWriter = null;
076    
077            private transient IndexSearcher indexSearcher = null;
078    
079            private static final int MAXSIZE = 1000000;
080    
081            private static final int AUTOFLUSHCOUNT = 100000;
082    
083            private boolean readOnly = false;
084    
085            private transient File path = null;
086    
087            private transient Analyzer analyzer = null;
088    
089            private int count = 0;
090    
091            public LuceneMap() throws IOException {
092                    this(null, false);
093            }
094    
095            public LuceneMap(File dir) throws IOException {
096                    this(dir, false);
097            }
098    
099            public LuceneMap(File path, boolean readOnly) throws IOException {
100                    this.readOnly = readOnly;
101                    this.path = path;
102            }
103    
104            public Directory getDirectory() throws IOException {
105                    if (directory == null) {
106                            directory = FSDirectory.open(getPath());
107                    }
108                    return directory;
109            }
110    
111            public File getPath() throws IOException {
112                    if (path == null) {
113                            path = File.createTempFile("lucene", "");
114                            path.delete();
115                            path.mkdir();
116                    }
117                    return path;
118            }
119    
120            public synchronized void optimize() throws CorruptIndexException,
121                            IOException {
122                    getIndexWriter().optimize();
123            }
124    
125            public synchronized void clear() {
126                    try {
127                            getIndexWriter().deleteAll();
128                    } catch (Exception e) {
129                            throw new MatrixException("cannot clear index", e);
130                    }
131            }
132    
133            public synchronized boolean containsKey(Object key) {
134                    try {
135                            Term term = new Term(KEYSTRING, getUniqueString(key));
136                            return getIndexSearcher().docFreq(term) > 0;
137                    } catch (Exception e) {
138                            throw new MatrixException("could not search documents: " + key, e);
139                    }
140            }
141    
142            public synchronized boolean containsValue(Object value) {
143                    try {
144                            Term term = new Term(VALUESTRING, getUniqueString(value));
145                            return getIndexSearcher().docFreq(term) > 0;
146                    } catch (Exception e) {
147                            throw new MatrixException("could not search documents: " + value, e);
148                    }
149            }
150    
151            public synchronized V get(Object key) {
152                    try {
153                            Term term = new Term(KEYSTRING, getUniqueString(key));
154                            TermQuery query = new TermQuery(term);
155                            TopDocs docs = getIndexSearcher().search(query, 1);
156                            if (docs.totalHits > 0) {
157                                    ScoreDoc match = docs.scoreDocs[0];
158                                    Document doc = getIndexSearcher().doc(match.doc);
159                                    return getObjectFromBytes(doc.getBinaryValue(VALUEDATA));
160                            }
161                    } catch (Exception e) {
162                            throw new MatrixException("could not search documents: " + key, e);
163                    }
164                    return null;
165            }
166    
167            @SuppressWarnings("unchecked")
168            private V getObjectFromBytes(byte[] bytes) {
169                    try {
170                            ByteArrayInputStream bis = new ByteArrayInputStream(bytes);
171                            ObjectInputStream ois = new ObjectInputStream(bis);
172                            Object o = ois.readObject();
173                            ois.close();
174                            bis.close();
175                            return (V) o;
176                    } catch (Exception e) {
177                            throw new MatrixException("could not convert to object", e);
178                    }
179            }
180    
181            @SuppressWarnings("unchecked")
182            public synchronized Set<K> keySet() {
183                    Set<K> set = new HashSet<K>();
184                    if (isEmpty()) {
185                            return set;
186                    }
187                    try {
188                            Term term = new Term(KEYSTRING, "*");
189                            WildcardQuery query = new WildcardQuery(term);
190                            TopDocs docs = getIndexSearcher().search(query, MAXSIZE);
191    
192                            for (ScoreDoc sd : docs.scoreDocs) {
193                                    Document d = getIndexSearcher().doc(sd.doc);
194                                    set.add((K) getObjectFromBytes(d.getBinaryValue(KEYDATA)));
195                            }
196                            return set;
197                    } catch (Exception e) {
198                            throw new MatrixException("could not search documents", e);
199                    }
200            }
201    
202            private static String getUniqueString(Object o) throws IOException {
203                    if (o == null) {
204                            return "";
205                    } else if (o instanceof String) {
206                            return (String) o;
207                    } else {
208                            return StringUtil.encodeToHex((Serializable) o);
209                    }
210            }
211    
212            public synchronized V put(K key, V value) {
213                    try {
214                            Term term = new Term(KEYSTRING, getUniqueString(key));
215                            Document doc = new Document();
216                            doc.add(new Field(KEYSTRING, getUniqueString(key), Field.Store.YES,
217                                            Field.Index.NOT_ANALYZED));
218                            doc.add(new Field(KEYDATA, getBytes(key), Field.Store.YES));
219                            doc.add(new Field(VALUESTRING, getUniqueString(value),
220                                            Field.Store.YES, Field.Index.NOT_ANALYZED));
221                            doc.add(new Field(VALUEDATA, getBytes(value), Field.Store.YES));
222                            getIndexWriter().updateDocument(term, doc);
223    
224                            // auto flush from time to time
225                            if (++count % AUTOFLUSHCOUNT == 0) {
226                                    flush();
227                            }
228                            return null;
229                    } catch (Exception e) {
230                            throw new MatrixException("could not add document: " + key, e);
231                    }
232            }
233    
234            public synchronized V remove(Object key) {
235                    try {
236                            Term term = new Term(KEYSTRING, getUniqueString(key));
237                            getIndexWriter().deleteDocuments(term);
238                            // auto flush from time to time
239                            if (++count % AUTOFLUSHCOUNT == 0) {
240                                    flush();
241                            }
242                            return null;
243                    } catch (Exception e) {
244                            throw new MatrixException("could not delete document: " + key, e);
245                    }
246            }
247    
248            public Analyzer getAnalyzer() {
249                    if (analyzer == null) {
250                            analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
251                    }
252                    return analyzer;
253            }
254    
255            public void setAnalyzer(Analyzer analyzer) {
256                    this.analyzer = analyzer;
257            }
258    
259            public synchronized int size() {
260                    try {
261                            flush();
262                            if (indexSearcher != null
263                                            && indexSearcher.getIndexReader().isCurrent()) {
264                                    return indexSearcher.getIndexReader().numDocs();
265                            } else {
266                                    int size = getIndexWriter().numDocs();
267                                    return size;
268                            }
269                    } catch (Exception e) {
270                            throw new MatrixException("could not count documents", e);
271                    }
272            }
273    
274            public synchronized void flush() throws IOException {
275                    IndexWriter iw = getIndexWriter();
276                    iw.expungeDeletes(true);
277                    iw.commit();
278                    iw.close(true);
279                    indexWriter = null;
280    
281            }
282    
283            public synchronized void close() throws IOException {
284                    if (indexSearcher != null) {
285                            indexSearcher.close();
286                            indexSearcher = null;
287                    }
288                    if (indexWriter != null) {
289                            indexWriter.close(true);
290                            indexWriter = null;
291                    }
292    
293            }
294    
295            private synchronized IndexWriter getIndexWriter() {
296                    try {
297                            if (!readOnly && indexSearcher != null) {
298                                    indexSearcher.close();
299                                    indexSearcher = null;
300                            }
301                            if (indexWriter == null) {
302                                    if (IndexReader.indexExists(getDirectory())) {
303                                            if (!readOnly) {
304                                                    if (IndexWriter.isLocked(getDirectory())) {
305                                                            IndexWriter.unlock(getDirectory());
306                                                    }
307                                                    indexWriter = new IndexWriter(getDirectory(),
308                                                                    getAnalyzer(), MaxFieldLength.UNLIMITED);
309                                            }
310                                    } else {
311                                            if (!readOnly) {
312                                                    indexWriter = new IndexWriter(getDirectory(),
313                                                                    getAnalyzer(), true, MaxFieldLength.UNLIMITED);
314                                            }
315                                    }
316                            }
317                            return indexWriter;
318                    } catch (Exception e) {
319                            throw new MatrixException("could not prepare writher", e);
320                    }
321            }
322    
323            private synchronized IndexSearcher getIndexSearcher() {
324                    try {
325                            if (!IndexReader.indexExists(getDirectory())) {
326                                    getIndexWriter();
327                            }
328                            if (indexWriter != null) {
329                                    if (indexSearcher != null) {
330                                            indexSearcher.close();
331                                            indexSearcher = null;
332                                    }
333                                    indexWriter.commit();
334                                    indexWriter.waitForMerges();
335                                    indexWriter.expungeDeletes(true);
336                                    indexWriter.close();
337                                    indexWriter = null;
338                            }
339                            if (indexSearcher != null) {
340                                    if (!indexSearcher.getIndexReader().isCurrent()) {
341                                            indexSearcher.close();
342                                            indexSearcher = null;
343                                    }
344                            }
345                            if (indexSearcher == null) {
346                                    indexSearcher = new IndexSearcher(directory, true);
347                            }
348                            return indexSearcher;
349                    } catch (Exception e) {
350                            throw new MatrixException("could not prepare reader", e);
351                    }
352            }
353    
354            private byte[] getBytes(Object o) {
355                    try {
356                            ByteArrayOutputStream bao = new ByteArrayOutputStream();
357                            ObjectOutputStream oos = new ObjectOutputStream(bao);
358                            oos.writeObject(o);
359                            oos.close();
360                            bao.close();
361                            return bao.toByteArray();
362                    } catch (Exception e) {
363                            throw new MatrixException("could not convert to bytes: " + o, e);
364                    }
365            }
366    
367            // @SuppressWarnings("unchecked")
368            // private void readObject(ObjectInputStream s) throws IOException,
369            // ClassNotFoundException {
370            // s.defaultReadObject();
371            // while (true) {
372            // try {
373            // K k = (K) s.readObject();
374            // V v = (V) s.readObject();
375            // put(k, v);
376            // } catch (OptionalDataException e) {
377            // return;
378            // }
379            // }
380            // }
381            //
382            // private void writeObject(ObjectOutputStream s) throws IOException,
383            // MatrixException {
384            // s.defaultWriteObject();
385            // for (Object k : keySet()) {
386            // Object v = get(k);
387            // s.writeObject(k);
388            // s.writeObject(v);
389            // }
390            // }
391    
392            public synchronized void erase() throws IOException {
393                    close();
394                    FileUtil.deleteRecursive(path);
395            }
396    
397    }