001    /*
002     * Copyright (C) 2008-2010 by Holger Arndt
003     *
004     * This file is part of the Universal Java Matrix Package (UJMP).
005     * See the NOTICE file distributed with this work for additional
006     * information regarding copyright ownership and licensing.
007     *
008     * UJMP is free software; you can redistribute it and/or modify
009     * it under the terms of the GNU Lesser General Public License as
010     * published by the Free Software Foundation; either version 2
011     * of the License, or (at your option) any later version.
012     *
013     * UJMP is distributed in the hope that it will be useful,
014     * but WITHOUT ANY WARRANTY; without even the implied warranty of
015     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
016     * GNU Lesser General Public License for more details.
017     *
018     * You should have received a copy of the GNU Lesser General Public
019     * License along with UJMP; if not, write to the
020     * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
021     * Boston, MA  02110-1301  USA
022     */
023    
024    package org.ujmp.core.stringmatrix.calculation;
025    
026    import java.util.HashSet;
027    import java.util.Set;
028    
029    import org.ujmp.core.Matrix;
030    import org.ujmp.core.exceptions.MatrixException;
031    import org.ujmp.core.util.StringUtil;
032    
033    public class RemovePunctuation extends AbstractStringCalculation {
034            private static final long serialVersionUID = 4734721685667215634L;
035    
036            public static Set<Character> allowedChars = null;
037    
038            static {
039                    allowedChars = new HashSet<Character>();
040                    allowedChars.add(' ');
041                    for (char c = 'a'; c <= 'z'; c++) {
042                            allowedChars.add(c);
043                    }
044                    for (char c = 'A'; c <= 'Z'; c++) {
045                            allowedChars.add(c);
046                    }
047            }
048    
049            public RemovePunctuation(Matrix m) {
050                    super(m);
051            }
052    
053            public String getString(long... coordinates) throws MatrixException {
054                    String s = getSource().getAsString(coordinates);
055                    s = StringUtil.retainChars(s, allowedChars, ' ');
056                    s = s.replaceAll("\\s+", " ");
057                    return s;
058            }
059    
060    }