001 /* 002 * Copyright (C) 2008-2010 by Holger Arndt 003 * 004 * This file is part of the Universal Java Matrix Package (UJMP). 005 * See the NOTICE file distributed with this work for additional 006 * information regarding copyright ownership and licensing. 007 * 008 * UJMP is free software; you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation; either version 2 011 * of the License, or (at your option) any later version. 012 * 013 * UJMP is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU Lesser General Public License for more details. 017 * 018 * You should have received a copy of the GNU Lesser General Public 019 * License along with UJMP; if not, write to the 020 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, 021 * Boston, MA 02110-1301 USA 022 */ 023 024 package org.ujmp.core.stringmatrix.calculation; 025 026 import java.util.Collection; 027 import java.util.regex.Pattern; 028 029 import org.ujmp.core.Matrix; 030 import org.ujmp.core.calculation.Calculation.Ret; 031 import org.ujmp.core.exceptions.MatrixException; 032 033 public interface StringCalculations { 034 035 /** 036 * Replaces matching text in every entry of the matrix. 037 * 038 * @param returnType 039 * Select whether a new or a linked Matrix is returned, or if the 040 * operation is performed on the original Matrix 041 * @param search 042 * Regular expression to search for 043 * @param replacement 044 * Replacement String 045 * @return matrix with modified entries 046 */ 047 public Matrix replaceRegex(Ret returnType, String search, String replacement) 048 throws MatrixException; 049 050 /** 051 * Replaces matching text in every entry of the matrix. 052 * 053 * @param returnType 054 * Select whether a new or a linked Matrix is returned, or if the 055 * operation is performed on the original Matrix 056 * @param search 057 * Regular expression pattern to search for 058 * @param replacement 059 * Replacement String 060 * @return matrix with modified entries 061 */ 062 public Matrix replaceRegex(Ret returnType, Pattern search, String replacement) 063 throws MatrixException; 064 065 public Matrix lowerCase(Ret returnType) throws MatrixException; 066 067 public Matrix upperCase(Ret returnType) throws MatrixException; 068 069 public Matrix tfIdf(boolean calculateTf, boolean calculateIdf, boolean normalize) 070 throws MatrixException; 071 072 public Matrix removePunctuation(Ret ret) throws MatrixException; 073 074 public Matrix stem(Ret ret) throws MatrixException; 075 076 public Matrix removeWords(Ret ret, Collection<String> words) throws MatrixException; 077 078 }