001    /*
002     * Copyright (C) 2008-2010 by Holger Arndt
003     *
004     * This file is part of the Universal Java Matrix Package (UJMP).
005     * See the NOTICE file distributed with this work for additional
006     * information regarding copyright ownership and licensing.
007     *
008     * UJMP is free software; you can redistribute it and/or modify
009     * it under the terms of the GNU Lesser General Public License as
010     * published by the Free Software Foundation; either version 2
011     * of the License, or (at your option) any later version.
012     *
013     * UJMP is distributed in the hope that it will be useful,
014     * but WITHOUT ANY WARRANTY; without even the implied warranty of
015     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
016     * GNU Lesser General Public License for more details.
017     *
018     * You should have received a copy of the GNU Lesser General Public
019     * License along with UJMP; if not, write to the
020     * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
021     * Boston, MA  02110-1301  USA
022     */
023    
024    package org.ujmp.core.stringmatrix.calculation;
025    
026    import org.ujmp.core.Matrix;
027    import org.ujmp.core.exceptions.MatrixException;
028    import org.ujmp.core.util.PorterStemmer;
029    
030    public class Stem extends AbstractStringCalculation {
031            private static final long serialVersionUID = 1221910899287177556L;
032    
033            private PorterStemmer stemmer = new PorterStemmer();
034    
035            public Stem(Matrix m) {
036                    super(m);
037            }
038    
039            
040            public String getString(long... coordinates) throws MatrixException {
041                    String s = getSource().getAsString(coordinates).toLowerCase();
042                    StringBuilder result = new StringBuilder(s.length());
043                    String[] words = s.split("\\s+");
044                    for (int i = 0; i < words.length; i++) {
045                            String w = words[i];
046                            if (w.length() == 0) {
047                                    continue;
048                            }
049                            result.append(stemmer.stem(w));
050                            if (i < words.length - 1) {
051                                    result.append(" ");
052                            }
053                    }
054                    return result.toString();
055            }
056    
057    }