001 /* 002 * Copyright (C) 2008-2010 by Holger Arndt 003 * 004 * This file is part of the Universal Java Matrix Package (UJMP). 005 * See the NOTICE file distributed with this work for additional 006 * information regarding copyright ownership and licensing. 007 * 008 * UJMP is free software; you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation; either version 2 011 * of the License, or (at your option) any later version. 012 * 013 * UJMP is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU Lesser General Public License for more details. 017 * 018 * You should have received a copy of the GNU Lesser General Public 019 * License along with UJMP; if not, write to the 020 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, 021 * Boston, MA 02110-1301 USA 022 */ 023 024 package org.ujmp.core.stringmatrix.calculation; 025 026 import java.util.HashSet; 027 import java.util.Set; 028 029 import org.ujmp.core.Matrix; 030 import org.ujmp.core.exceptions.MatrixException; 031 import org.ujmp.core.util.StringUtil; 032 033 public class RemovePunctuation extends AbstractStringCalculation { 034 private static final long serialVersionUID = 4734721685667215634L; 035 036 public static Set<Character> allowedChars = null; 037 038 static { 039 allowedChars = new HashSet<Character>(); 040 allowedChars.add(' '); 041 for (char c = 'a'; c <= 'z'; c++) { 042 allowedChars.add(c); 043 } 044 for (char c = 'A'; c <= 'Z'; c++) { 045 allowedChars.add(c); 046 } 047 } 048 049 public RemovePunctuation(Matrix m) { 050 super(m); 051 } 052 053 public String getString(long... coordinates) throws MatrixException { 054 String s = getSource().getAsString(coordinates); 055 s = StringUtil.retainChars(s, allowedChars, ' '); 056 s = s.replaceAll("\\s+", " "); 057 return s; 058 } 059 060 }