package net.sansa_stack.ml.common.outliers.vandalismdetection.feature.extraction;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.sansa_stack.ml.common.outliers.vandalismdetection.feature.Utils$;
import org.apache.commons.lang3.StringUtils;
import scala.Predef$;
import scala.Serializable;
import scala.collection.mutable.ArrayOps;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;
import scala.runtime.DoubleRef;
import scala.runtime.IntRef;
import scala.runtime.ObjectRef;

/* compiled from: Word.scala */
@ScalaSignature(bytes = "\u0006\u0001\tEg!B8q\u0001\u0005\r\u0001bBA\f\u0001\u0011\u0005\u0011\u0011\u0004\u0005\b\u0003?\u0001A\u0011AA\u0011\u0011\u001d\tI\u0005\u0001C\u0001\u0003\u0017B\u0011\"!\u001b\u0001\u0005\u0004%\t!a\u001b\t\u0011\u00055\u0004\u0001)A\u0005\u0003gA\u0011\"a\u001c\u0001\u0005\u0004%\t!!\u001d\t\u0011\u0005M\u0004\u0001)A\u0005\u0003+Bq!!\u001e\u0001\t\u0003\t9\bC\u0005\u0002|\u0001\u0011\r\u0011\"\u0001\u0002l!A\u0011Q\u0010\u0001!\u0002\u0013\t\u0019\u0004C\u0005\u0002��\u0001\u0011\r\u0011\"\u0001\u0002r!A\u0011\u0011\u0011\u0001!\u0002\u0013\t)\u0006C\u0005\u0002\u0004\u0002\u0011\r\u0011\"\u0001\u0002\u0006\"A\u0011Q\u0012\u0001!\u0002\u0013\t9\tC\u0004\u0002\u0010\u0002!\t!!%\t\u000f\u0005m\u0005\u0001\"\u0001\u0002\u001e\"9\u0011\u0011\u0015\u0001\u0005\u0002\u0005\r\u0006\"CAT\u0001\t\u0007I\u0011AA9\u0011!\tI\u000b\u0001Q\u0001\n\u0005U\u0003\"CAV\u0001\t\u0007I\u0011AAC\u0011!\ti\u000b\u0001Q\u0001\n\u0005\u001d\u0005bBAX\u0001\u0011\u0005\u0011\u0011\u0017\u0005\n\u0003k\u0003!\u0019!C\u0001\u0003cB\u0001\"a.\u0001A\u0003%\u0011Q\u000b\u0005\n\u0003s\u0003!\u0019!C\u0001\u0003\u000bC\u0001\"a/\u0001A\u0003%\u0011q\u0011\u0005\b\u0003{\u0003A\u0011AA`\u0011%\ty\r\u0001b\u0001\n\u0003\t\t\u000e\u0003\u0005\u0002V\u0002\u0001\u000b\u0011BAj\u0011%\t9\u000e\u0001b\u0001\n\u0003\tI\u000e\u0003\u0005\u0002d\u0002\u0001\u000b\u0011BAn\u0011%\t)\u000f\u0001b\u0001\n\u0003\tY\u0007\u0003\u0005\u0002h\u0002\u0001\u000b\u0011BA\u001a\u0011%\tI\u000f\u0001b\u0001\n\u0003\t\t\b\u0003\u0005\u0002l\u0002\u0001\u000b\u0011BA+\u0011\u001d\ti\u000f\u0001C\u0001\u0003_D\u0011\"a=\u0001\u0005\u0004%\t!!7\t\u0011\u0005U\b\u0001)A\u0005\u00037D\u0011\"a>\u0001\u0005\u0004%\t!a\u001b\t\u0011\u0005e\b\u0001)A\u0005\u0003gA\u0011\"a?\u0001\u0005\u0004%\t!!\u001d\t\u0011\u0005u\b\u0001)A\u0005\u0003+B\u0011\"a@\u0001\u0005\u0004%\t!!\"\t\u0011\t\u0005\u0001\u0001)A\u0005\u0003\u000fCqAa\u0001\u0001\t\u0003\u0011)\u0001C\u0005\u0003\n\u0001\u0011\r\u0011\"\u0001\u0002R\"A!1\u0002\u0001!\u0002\u0013\t\u0019\u000eC\u0005\u0003\u000e\u0001\u0011\r\u0011\"\u0001\u0002Z\"A!q\u0002\u0001!\u0002\u0013\tY\u000eC\u0005\u0003\u0012\u0001\u0011\r\u0011\"\u0001\u0002l!A!1\u0003\u0001!\u0002\u0013\t\u0019\u0004C\u0005\u0003\u0016\u0001\u0011\r\u0011\"\u0001\u0002r!A!q\u0003\u0001!\u0002\u0013\t)\u0006C\u0005\u0003\u001a\u0001\u0011\r\u0011\"\u0001\u0002\u0006\"A!1\u0004\u0001!\u0002\u0013\t9\tC\u0004\u0003\u001e\u0001!\tAa\b\t\u0013\t\r\u0002A1A\u0005\u0002\u0005e\u0007\u0002\u0003B\u0013\u0001\u0001\u0006I!a7\t\u0013\t\u001d\u0002A1A\u0005\u0002\u0005-\u0004\u0002\u0003B\u0015\u0001\u0001\u0006I!a\r\t\u0013\t-\u0002A1A\u0005\u0002\u0005E\u0004\u0002\u0003B\u0017\u0001\u0001\u0006I!!\u0016\t\u000f\t=\u0002\u0001\"\u0001\u00032!I!Q\u0007\u0001C\u0002\u0013\u0005\u00111\u000e\u0005\t\u0005o\u0001\u0001\u0015!\u0003\u00024!I!\u0011\b\u0001C\u0002\u0013\u0005\u0011\u0011\u000f\u0005\t\u0005w\u0001\u0001\u0015!\u0003\u0002V!I!Q\b\u0001C\u0002\u0013\u0005\u0011Q\u0011\u0005\t\u0005\u007f\u0001\u0001\u0015!\u0003\u0002\b\"9!\u0011\t\u0001\u0005\u0002\t\r\u0003\"\u0003B$\u0001\t\u0007I\u0011AAi\u0011!\u0011I\u0005\u0001Q\u0001\n\u0005M\u0007\"\u0003B&\u0001\t\u0007I\u0011AAm\u0011!\u0011i\u0005\u0001Q\u0001\n\u0005m\u0007\"\u0003B(\u0001\t\u0007I\u0011AA6\u0011!\u0011\t\u0006\u0001Q\u0001\n\u0005M\u0002\"\u0003B*\u0001\t\u0007I\u0011AA9\u0011!\u0011)\u0006\u0001Q\u0001\n\u0005U\u0003\"\u0003B,\u0001\t\u0007I\u0011AAC\u0011!\u0011I\u0006\u0001Q\u0001\n\u0005\u001d\u0005b\u0002B.\u0001\u0011\u0005!Q\f\u0005\n\u0005C\u0002!\u0019!C\u0001\u0003#D\u0001Ba\u0019\u0001A\u0003%\u00111\u001b\u0005\n\u0005K\u0002!\u0019!C\u0001\u00033D\u0001Ba\u001a\u0001A\u0003%\u00111\u001c\u0005\n\u0005S\u0002!\u0019!C\u0001\u0003WB\u0001Ba\u001b\u0001A\u0003%\u00111\u0007\u0005\n\u0005[\u0002!\u0019!C\u0001\u0003cB\u0001Ba\u001c\u0001A\u0003%\u0011Q\u000b\u0005\n\u0005c\u0002!\u0019!C\u0001\u0003\u000bC\u0001Ba\u001d\u0001A\u0003%\u0011q\u0011\u0005\b\u0005k\u0002A\u0011\u0001B<\u0011\u001d\u0011Y\b\u0001C\u0001\u0005{B\u0011Ba\"\u0001\u0005\u0004%\t!!5\t\u0011\t%\u0005\u0001)A\u0005\u0003'D\u0011Ba#\u0001\u0005\u0004%\t!!7\t\u0011\t5\u0005\u0001)A\u0005\u00037D\u0011Ba$\u0001\u0005\u0004%\t!a\u001b\t\u0011\tE\u0005\u0001)A\u0005\u0003gA\u0011Ba%\u0001\u0005\u0004%\t!!\u001d\t\u0011\tU\u0005\u0001)A\u0005\u0003+B\u0011Ba&\u0001\u0005\u0004%\t!!\"\t\u0011\te\u0005\u0001)A\u0005\u0003\u000fCqAa'\u0001\t\u0003\u0011i\nC\u0004\u0003$\u0002!\tA!*\t\u0013\t%\u0006A1A\u0005\u0002\t-\u0006\u0002\u0003BY\u0001\u0001\u0006IA!,\t\u000f\tM\u0006\u0001\"\u0001\u00036\"9!\u0011\u0018\u0001\u0005\u0002\tm\u0006b\u0002B`\u0001\u0011\u0005!\u0011\u0019\u0002\u0005/>\u0014HM\u0003\u0002re\u0006QQ\r\u001f;sC\u000e$\u0018n\u001c8\u000b\u0005M$\u0018a\u00024fCR,(/\u001a\u0006\u0003kZ\f!C^1oI\u0006d\u0017n]7eKR,7\r^5p]*\u0011q\u000f_\u0001\t_V$H.[3sg*\u0011\u0011P_\u0001\u0007G>lWn\u001c8\u000b\u0005md\u0018AA7m\u0015\tih0A\u0006tC:\u001c\u0018mX:uC\u000e\\'\"A@\u0002\u00079,Go\u0001\u0001\u0014\u000b\u0001\t)!!\u0005\u0011\t\u0005\u001d\u0011QB\u0007\u0003\u0003\u0013Q!!a\u0003\u0002\u000bM\u001c\u0017\r\\1\n\t\u0005=\u0011\u0011\u0002\u0002\u0007\u0003:L(+\u001a4\u0011\t\u0005\u001d\u00111C\u0005\u0005\u0003+\tIA\u0001\u0007TKJL\u0017\r\\5{C\ndW-\u0001\u0004=S:LGO\u0010\u000b\u0003\u00037\u00012!!\b\u0001\u001b\u0005\u0001\u0018\u0001D<pe\u00124U-\u0019;ve\u0016\u001cH\u0003BA\u0012\u0003_\u0001b!a\u0002\u0002&\u0005%\u0012\u0002BA\u0014\u0003\u0013\u0011Q!\u0011:sCf\u0004B!a\u0002\u0002,%!\u0011QFA\u0005\u0005\u0019!u.\u001e2mK\"9\u0011\u0011\u0007\u0002A\u0002\u0005M\u0012\u0001C*ueZ\u000bG.^3\u0011\t\u0005U\u00121\t\b\u0005\u0003o\ty\u0004\u0005\u0003\u0002:\u0005%QBAA\u001e\u0015\u0011\ti$!\u0001\u0002\rq\u0012xn\u001c;?\u0013\u0011\t\t%!\u0003\u0002\rA\u0013X\rZ3g\u0013\u0011\t)%a\u0012\u0003\rM#(/\u001b8h\u0015\u0011\t\t%!\u0003\u0002\u0013]|'\u000f\u001a*bi&|GCBA\u0015\u0003\u001b\n\t\u0006C\u0004\u0002P\r\u0001\r!a\r\u0002\u0007M$(\u000fC\u0004\u0002T\r\u0001\r!!\u0016\u0002\u000fA\fG\u000f^3s]B!\u0011qKA3\u001b\t\tIF\u0003\u0003\u0002\\\u0005u\u0013!\u0002:fO\u0016D(\u0002BA0\u0003C\nA!\u001e;jY*\u0011\u00111M\u0001\u0005U\u00064\u0018-\u0003\u0003\u0002h\u0005e#a\u0002)biR,'O\\\u0001\u0018e\u0016<W\r_0MC:<W/Y4f/>\u0014HMU1uS>,\"!a\r\u00021I,w-\u001a=`\u0019\u0006tw-^1hK^{'\u000f\u001a*bi&|\u0007%A\rqCR$XM\u001d8`\u0019\u0006tw-^1hK^{'\u000f\u001a*bi&|WCAA+\u0003i\u0001\u0018\r\u001e;fe:|F*\u00198hk\u0006<WmV8sIJ\u000bG/[8!\u0003ia\u0017M\\4vC\u001e,wk\u001c:e%\u0006$\u0018n\\\"iCJ\f7\r^3s)\u0011\tI#!\u001f\t\u000f\u0005=\u0003\u00021\u0001\u00024\u0005I\"/Z4fq~\u001buN\u001c;bS:d\u0015M\\4vC\u001e,wk\u001c:e\u0003i\u0011XmZ3y?\u000e{g\u000e^1j]2\u000bgnZ;bO\u0016<vN\u001d3!\u0003m\u0001\u0018\r\u001e;fe:|6i\u001c8uC&tG*\u00198hk\u0006<WmV8sI\u0006a\u0002/\u0019;uKJtwlQ8oi\u0006Lg\u000eT1oOV\fw-Z,pe\u0012\u0004\u0013aG7bi\u000eDWM]0D_:$\u0018-\u001b8MC:<W/Y4f/>\u0014H-\u0006\u0002\u0002\bB!\u0011qKAE\u0013\u0011\tY)!\u0017\u0003\u000f5\u000bGo\u00195fe\u0006aR.\u0019;dQ\u0016\u0014xlQ8oi\u0006Lg\u000eT1oOV\fw-Z,pe\u0012\u0004\u0013aE2p]R\f\u0017N\u001c'b]\u001e,\u0018mZ3X_J$G\u0003BAJ\u00033\u0003B!a\u0002\u0002\u0016&!\u0011qSA\u0005\u0005\u001d\u0011un\u001c7fC:Dq!a\u0014\u0010\u0001\u0004\t\u0019$\u0001\nvaB,'oY1tK^{'\u000f\u001a*bi&|G\u0003BA\u0015\u0003?Cq!a\u0014\u0011\u0001\u0004\t\u0019$\u0001\nm_^,'oY1tK^{'\u000f\u001a*bi&|G\u0003BA\u0015\u0003KCq!a\u0014\u0012\u0001\u0004\t\u0019$\u0001\fqCR$XM\u001d8`/>\u0014HmQ8oi\u0006Lg.\u0016*M\u0003]\u0001\u0018\r\u001e;fe:|vk\u001c:e\u0007>tG/Y5o+Jc\u0005%\u0001\fnCR\u001c\u0007.\u001a:`/>\u0014HmQ8oi\u0006Lg.\u0016*M\u0003]i\u0017\r^2iKJ|vk\u001c:e\u0007>tG/Y5o+Jc\u0005%\u0001\bd_:$\u0018-\u001b8V%2;vN\u001d3\u0015\t\u0005M\u00151\u0017\u0005\b\u0003\u001f2\u0002\u0019AA\u001a\u0003M\u0001\u0018\r\u001e;fe:|Fn\u001c8hKN$xk\u001c:e\u0003Q\u0001\u0018\r\u001e;fe:|Fn\u001c8hKN$xk\u001c:eA\u0005\u0019R.\u0019;dQ\u0016\u0014x\f\\8oO\u0016\u001cHoV8sI\u0006!R.\u0019;dQ\u0016\u0014x\f\\8oO\u0016\u001cHoV8sI\u0002\n1\u0002\\8oO\u0016\u001cHoV8sIR!\u0011\u0011YAg!\u0011\t\u0019-!3\u000e\u0005\u0005\u0015'\u0002BAd\u0003C\nA\u0001\\1oO&!\u00111ZAc\u0005\u001dIe\u000e^3hKJDq!a\u0014\u001c\u0001\u0004\t\u0019$\u0001\nmk&\u001chk\u001c8BQ:<vN\u001d3mSN$XCAAj!\u0019\t9!!\n\u00024\u0005\u0019B.^5t->t\u0017\t\u001b8X_J$G.[:uA\u00051Ao\\6f]N,\"!a7\u0011\r\u0005u\u0017q\\A\u001a\u001b\t\ti&\u0003\u0003\u0002b\u0006u#\u0001\u0002'jgR\fq\u0001^8lK:\u001c\b%A\u0007qCR$XM\u001d8TiJLgnZ\u0001\u000fa\u0006$H/\u001a:o'R\u0014\u0018N\\4!\u0003=\u0001\u0018\r\u001e;fe:|&-\u00193X_J$\u0017\u0001\u00059biR,'O\\0cC\u0012<vN\u001d3!\u00031\u0011\u0017\rZ,pe\u0012\u0014\u0016\r^5p)\u0011\tI#!=\t\u000f\u0005=C\u00051\u0001\u00024\u0005)Bo\\6f]N|6m\u001c8uC&t'-\u00193x_J$\u0017A\u0006;pW\u0016t7oX2p]R\f\u0017N\u001c2bI^|'\u000f\u001a\u0011\u00029A\fG\u000f^3s]N#(/\u001b8h?\u000e|g\u000e^1j]\n\u000bGm^8sI\u0006i\u0002/\u0019;uKJt7\u000b\u001e:j]\u001e|6m\u001c8uC&t')\u00193x_J$\u0007%\u0001\fqCR$XM\u001d8`G>tG/Y5o\u0005\u0006$wo\u001c:e\u0003]\u0001\u0018\r\u001e;fe:|6m\u001c8uC&t')\u00193x_J$\u0007%\u0001\fnCR\u001c\u0007.\u001a:`\u0007>tG/Y5o\u0005\u0006$wk\u001c:e\u0003]i\u0017\r^2iKJ|6i\u001c8uC&t')\u00193X_J$\u0007%\u0001\bd_:$\u0018-\u001b8CC\u0012<vN\u001d3\u0015\t\u0005M%q\u0001\u0005\b\u0003\u001fj\u0003\u0019AA\u001a\u0003I\u0011\u0015M\u001c\"vS2$WM],pe\u0012d\u0017n\u001d;\u0002'\t\u000bgNQ;jY\u0012,'oV8sI2L7\u000f\u001e\u0011\u0002#Q|7.\u001a8t?\n\fgNQ;jY\u0012,'/\u0001\nu_.,gn]0cC:\u0014U/\u001b7eKJ\u0004\u0013\u0001\u00079biR,'O\\*ue&twm\u00182b]\n+\u0018\u000e\u001c3fe\u0006I\u0002/\u0019;uKJt7\u000b\u001e:j]\u001e|&-\u00198Ck&dG-\u001a:!\u0003I\u0001\u0018\r\u001e;fe:|&-\u00198Ck&dG-\u001a:\u0002'A\fG\u000f^3s]~\u0013\u0017M\u001c\"vS2$WM\u001d\u0011\u0002%5\fGo\u00195fe~\u0013\u0015M\u001c\"vS2$WM]\u0001\u0014[\u0006$8\r[3s?\n\u000bgNQ;jY\u0012,'\u000fI\u0001\u0017E\u0006t')^5mI\u0016\u0014xk\u001c:e\u0019&\u001cHoV8sIR!\u00111\u0013B\u0011\u0011\u001d\ty\u0005\u000fa\u0001\u0003g\t!\u0002^8lK:\u001cxLY1o\u0003-!xn[3og~\u0013\u0017M\u001c\u0011\u0002#A\fG\u000f^3s]N#(/\u001b8h?\n\fg.\u0001\nqCR$XM\u001d8TiJLgnZ0cC:\u0004\u0013a\u00049biR,'O\\0cC:<vN\u001d3\u0002!A\fG\u000f^3s]~\u0013\u0017M\\,pe\u0012\u0004\u0013\u0001\u00042b]^{'\u000f\u001a*bi&|G\u0003BA\u0015\u0005gAq!a\u0014@\u0001\u0004\t\u0019$A\rsK\u001e,\u0007pX2p]R\f\u0017N\u001c'b]\u001e,\u0018mZ3X_J$\u0017A\u0007:fO\u0016DxlY8oi\u0006Lg\u000eT1oOV\fw-Z,pe\u0012\u0004\u0013A\b9biR,'O\\0g_J\u001cuN\u001c;bS:d\u0015M\\4vC\u001e,wk\u001c:e\u0003}\u0001\u0018\r\u001e;fe:|fm\u001c:D_:$\u0018-\u001b8MC:<W/Y4f/>\u0014H\rI\u0001\u001c[\u0006$8\r[3s?\u000e|g\u000e^1j]2\u000bgnZ;bO\u0016<vN\u001d3\u000295\fGo\u00195fe~\u001bwN\u001c;bS:d\u0015M\\4vC\u001e,wk\u001c:eA\u0005Q2m\u001c8uC&tG*\u00198hk\u0006<WMQ1e/>\u0014HmV8sIR!\u00111\u0013B#\u0011\u001d\tyE\u0012a\u0001\u0003g\t\u0011\"T1mK:\u000bW.Z:\u0002\u00155\u000bG.\u001a(b[\u0016\u001c\b%A\bu_.,gn]0nC2,g*Y7f\u0003A!xn[3og~k\u0017\r\\3OC6,\u0007%\u0001\fqCR$XM\u001d8TiJLgnZ0NC2,g*Y7f\u0003]\u0001\u0018\r\u001e;fe:\u001cFO]5oO~k\u0015\r\\3OC6,\u0007%\u0001\tqCR$XM\u001d8`\u001b\u0006dWMT1nK\u0006\t\u0002/\u0019;uKJtw,T1mK:\u000bW.\u001a\u0011\u0002!5\fGo\u00195fe~k\u0015\r\\3OC6,\u0017!E7bi\u000eDWM]0NC2,g*Y7fA\u0005aQ.\u00197f\u001d\u0006lWmV8sIR!\u00111\u0013B0\u0011\u001d\ty%\u0015a\u0001\u0003g\t1BR3nC2,g*Y7fg\u0006aa)Z7bY\u0016t\u0015-\\3tA\u0005\tBo\\6f]N|f)Z7bY\u0016t\u0015-\\3\u0002%Q|7.\u001a8t?\u001a+W.\u00197f\u001d\u0006lW\rI\u0001\u0019a\u0006$H/\u001a:o'R\u0014\u0018N\\4`\r\u0016l\u0017\r\\3OC6,\u0017!\u00079biR,'O\\*ue&twm\u0018$f[\u0006dWMT1nK\u0002\n!\u0003]1ui\u0016\u0014hn\u0018$f\u001b\u0006dWMT1nK\u0006\u0019\u0002/\u0019;uKJtwLR3NC2,g*Y7fA\u0005\u0011R.\u0019;dQ\u0016\u0014xLR3NC2,g*Y7f\u0003Mi\u0017\r^2iKJ|f)Z'bY\u0016t\u0015-\\3!\u000391W-\\1mK:\u000bW.Z,pe\u0012$B!a%\u0003z!9\u0011q\n/A\u0002\u0005M\u0012\u0001L2veJ,g\u000e\u001e)sKZLw.^:D_6lWM\u001c;US\u0006dg*^7cKJ\u001c\u0006.\u0019:j]\u001e<vN\u001d3t)\u0019\t\tMa \u0003\u0004\"9!\u0011Q/A\u0002\u0005M\u0012aC:ue~\u001bWO\u001d:f]RDqA!\"^\u0001\u0004\t\u0019$\u0001\u0005TiJ|\u0006K]3w\u0003!\u0019Fo\u001c9X_J$\u0017!C*u_B<vN\u001d3!\u0003A!xn[3og~\u001bFo\u001c9X_J$7/A\tu_.,gn]0Ti>\u0004xk\u001c:eg\u0002\na\u0003]1ui\u0016\u0014hn\u0015;sS:<wl\u001d;pa^|'\u000fZ\u0001\u0018a\u0006$H/\u001a:o'R\u0014\u0018N\\4`gR|\u0007o^8sI\u0002\n\u0011\u0003]1ui\u0016\u0014hnX:u_B<xN\u001d3t\u0003I\u0001\u0018\r\u001e;fe:|6\u000f^8qo>\u0014Hm\u001d\u0011\u0002!5\fGo\u00195fe~\u001bHo\u001c9x_J$\u0017!E7bi\u000eDWM]0ti>\u0004xo\u001c:eA\u0005a4-\u001e:sK:$\bK]3wS>,8oQ8n[\u0016tG\u000fV5bY:+XNY3s'\"\f'/\u001b8h/>\u0014Hm],ji\"|W\u000f^*u_B<vN\u001d3t)\u0019\t\tMa(\u0003\"\"9!\u0011\u00115A\u0002\u0005M\u0002b\u0002BCQ\u0002\u0007\u00111G\u0001\u0011O\u0016$h*^7cKJ|e\rT5oWN$B!!\u000b\u0003(\"9\u0011qJ5A\u0002\u0005M\u0012\u0001\u0003*fO\u0016D8\u000b\u001e:\u0016\u0005\t5\u0006\u0003BAb\u0005_KA!!\u0012\u0002F\u0006I!+Z4fqN#(\u000fI\u0001\u0018O\u0016$h*^7cKJ|e\rT1oOV\fw-Z,pe\u0012$B!!\u000b\u00038\"9\u0011q\n7A\u0002\u0005M\u0012AD4fi:+XNY3s\u001f\u001a\f\u0016\n\u001a\u000b\u0005\u0003S\u0011i\fC\u0004\u0002P5\u0004\r!a\r\u0002\u0015A\u0014x\u000e]8si&|g\u000e\u0006\u0004\u0003D\n%'Q\u001a\t\u0005\u0003\u000f\u0011)-\u0003\u0003\u0003H\u0006%!!\u0002$m_\u0006$\bb\u0002Bf]\u0002\u0007\u0011\u0011F\u0001\t_2$7i\\;oi\"9!q\u001a8A\u0002\u0005%\u0012\u0001\u00038fo\u000e{WO\u001c;")
/* loaded from: input_file:net/sansa_stack/ml/common/outliers/vandalismdetection/feature/extraction/Word.class */
public class Word implements Serializable {
    private final String regex_LanguageWordRatio = "(a(frikaa?ns|lbanian?|lemanha|ng(lais|ol)|ra?b(e?|[ei]c|ian?|isc?h)\n    |rmenian?|ssamese|azeri|z[e\\\\u0259]rba(ijani?|ycan(ca)?|yjan)|\\\\u043d\\\\u0433\\\\u043b\\\\u0438\\\\u0439\\\\u0441\n    \\\\u043a\\\\u0438\\\\u0439)|b(ahasa( (indonesia|jawa|malaysia|melayu))?|angla|as(k|qu)e|[aeo]ng[ao]?li|elarusian?\n    |okm\\\\u00e5l|osanski|ra[sz]il(ian?)?|ritish( kannada)?|ulgarian?)|c(ebuano|hina|hinese( simplified)?|zech\n    |roat([eo]|ian?)|atal[a\\\\u00e0]n?|\\\\u0440\\\\u043f\\\\u0441\\\\u043a\\\\u0438|antonese)|[c\\\\u010d](esky|e[s\n    \\\\u0161]tina)\\r\\n|d(an(isc?h|sk)|e?uts?ch)|e(esti|ll[hi]nika|ng(els|le(ski|za)|lisc?h)|spa(g?[n\\\\u00f1]\n    h?i?ol|nisc?h)|speranto|stonian|usk[ae]ra)|f(ilipino|innish|ran[c\\\\u00e7](ais|e|ez[ao])|ren[cs]h|arsi|rancese)\n    |g(al(ego|ician)|uja?rati|ree(ce|k)|eorgian|erman[ay]?|ilaki)|h(ayeren|ebrew|indi|rvatski|ungar(y|ian))\n    |i(celandic|ndian?|ndonesian?|ngl[e\\\\u00ea]se?|ngilizce|tali(ano?|en(isch)?))|ja(pan(ese)?|vanese)\n    |k(a(nn?ada|zakh)|hmer|o(rean?|sova)|urd[i\\\\u00ee])|l(at(in[ao]?|vi(an?|e[s\\\\u0161]u))|ietuvi[u\\\\u0173]\n    |ithuanian?)|m(a[ck]edon(ian?|ski)|agyar|alay(alam?|sian?)?|altese|andarin|arathi|elayu|ontenegro\n    |ongol(ian?)|yanmar)|n(e(d|th)erlands?|epali|orw(ay|egian)|orsk( bokm[a\\\\u00e5]l)?|ynorsk)|o(landese|dia)\n    |p(ashto|ersi?an?|ol(n?isc?h|ski)|or?tugu?[e\\\\u00ea]se?(( d[eo])? brasil(eiro)?| ?\\\\(brasil\\\\))?|unjabi)\n    |r(om[a\\\\u00e2i]ni?[a\\\\u0103]n?|um(ano|\\\\u00e4nisch)|ussi([ao]n?|sch))|s(anskrit|erbian|imple english\n    |inha?la|lov(ak(ian?)?|en\\\\u0161?[c\\\\u010d]ina|en(e|ij?an?)|uomi)|erbisch|pagnolo?|panisc?h|rbeska|rpski\n    |venska|c?wedisc?h|hqip)|t(a(galog|mil)|elugu|hai(land)?|i[e\\\\u1ebf]ng vi[e\\\\u1ec7]t|[u\\\\u00fc]rk([c\\\\u00e7]e\n    |isc?h|i\\\\u015f|ey))|u(rdu|zbek)|v(alencia(no?)?|ietnamese)|welsh|(\\\\u0430\\\\u043d\\\\u0433\\\\u043b\\\\u0438\n    \\\\u0438\\\\u0441|[k\\\\u043a]\\\\u0430\\\\u043b\\\\u043c\\\\u044b\\\\u043a\\\\u0441|[k\\\\u043a]\\\\u0430\\\\u0437\\\\u0430\n    \\\\u0445\\\\u0441|\\\\u043d\\\\u0435\\\\u043c\\\\u0435\\\\u0446|[p\\\\u0440]\\\\u0443\\\\u0441\\\\u0441|[y\\\\u0443]\\\\u0437\n    \\\\u0431\\\\u0435\\\\u043a\\\\u0441)\\\\u043a\\\\u0438\\\\u0439( \\\\u044f\\\\u0437\\\\u044b\\\\u043a)??|\\\\u05e2\\\\u05d1\n    \\\\u05e8\\\\u05d9\\\\u05ea|[k\\\\u043a\\\\u049b](\\\\u0430\\\\u0437\\\\u0430[\\\\u043a\\\\u049b]\\\\u0448\\\\u0430|\\\\u044b\n    \\\\u0440\\\\u0433\\\\u044b\\\\u0437\\\\u0447\\\\u0430|\\\\u0438\\\\u0440\\\\u0438\\\\u043b\\\\u043b)|\\\\u0443\\\\u043a\n    \\\\u0440\\\\u0430\\\\u0457\\\\u043d\\\\u0441\\\\u044c\\\\u043a(\\\\u0430|\\\\u043e\\\\u044e)|\\\\u0431(\\\\u0435\\\\u043b\n    \\\\u0430\\\\u0440\\\\u0443\\\\u0441\\\\u043a\\\\u0430\\\\u044f|\\\\u044a\\\\u043b\\\\u0433\\\\u0430\\\\u0440\\\\u0441\\\\u043a\n    \\\\u0438( \\\\u0435\\\\u0437\\\\u0438\\\\u043a)?)|\\\\u03b5\\\\u03bb\\\\u03bb[\\\\u03b7\\\\u03b9]\\\\u03bd\\\\u03b9\\\\u03ba\n    (\\\\u03ac|\\\\u03b1)|\\\\u10e5\\\\u10d0\\\\u10e0\\\\u10d7\\\\u10e3\\\\u10da\\\\u10d8|\\\\u0939\\\\u093f\\\\u0928\\\\u094d\n    \\\\u0926\\\\u0940|\\\\u0e44\\\\u0e17\\\\u0e22|[m\\\\u043c]\\\\u043e\\\\u043d\\\\u0433\\\\u043e\\\\u043b(\\\\u0438\\\\u0430)?\n    |([c\\\\u0441]\\\\u0440\\\\u043f|[m\\\\u043c]\\\\u0430\\\\u043a\\\\u0435\\\\u0434\\\\u043e\\\\u043d)\\\\u0441\\\\u043a\\\\u0438\n    |\\\\u0627\\\\u0644\\\\u0639\\\\u0631\\\\u0628\\\\u064a\\\\u0629|\\\\u65e5\\\\u672c\\\\u8a9e|\\\\ud55c\\\\uad6d(\\\\ub9d0|\\\\uc5b4)\n    |\\\\u200c\\\\u0939\\\\u093f\\\\u0928\\\\u0926\\\\u093c\\\\u093f|\\\\u09ac\\\\u09be\\\\u0982\\\\u09b2\\\\u09be|\\\\u0a2a\\\\u0a70\n    \\\\u0a1c\\\\u0a3e\\\\u0a2c\\\\u0a40|\\\\u092e\\\\u0930\\\\u093e\\\\u0920\\\\u0940|\\\\u0c95\\\\u0ca8\\\\u0ccd\\\\u0ca8\\\\u0ca1|\n    \\\\u0627\\\\u064f\\\\u0631\\\\u062f\\\\u064f\\\\u0648|\\\\u0ba4\\\\u0bae\\\\u0bbf\\\\u0bb4\\\\u0bcd|\\\\u0c24\\\\u0c46\\\\u0c32\n    \\\\u0c41\\\\u0c17\\\\u0c41|\\\\u0a97\\\\u0ac1\\\\u0a9c\\\\u0ab0\\\\u0abe\\\\u0aa4\\\\u0ac0|\\\\u0641\\\\u0627\\\\u0631\\\\u0633\n    \\\\u06cc|\\\\u067e\\\\u0627\\\\u0631\\\\u0633\\\\u06cc|\\\\u0d2e\\\\u0d32\\\\u0d2f\\\\u0d3e\\\\u0d33\\\\u0d02|\\\\u067e\\\\u069a\n    \\\\u062a\\\\u0648|\\\\u1019\\\\u103c\\\\u1014\\\\u103a\\\\u1019\\\\u102c\\\\u1018\\\\u102c\\\\u101e\\\\u102c|\\\\u4e2d\\\\u6587\n    (\\\\u7b80\\\\u4f53|\\\\u7e41\\\\u9ad4)?|\\\\u4e2d\\\\u6587\\\\uff08(\\\\u7b80\\\\u4f53?|\\\\u7e41\\\\u9ad4)\\\\uff09|\\\\u7b80\n    \\\\u4f53|\\\\u7e41\\\\u9ad4)";
    private final Pattern pattern_LanguageWordRatio = Pattern.compile(regex_LanguageWordRatio());
    private final String regex_ContainLanguageWord = "(^|\\\\n)([ei]n )??(a(frikaa?ns|lbanian?|lemanha|ng(lais|ol)|ra?b(e?\n    |[ei]c|ian?|isc?h)|rmenian?|ssamese|azeri|z[e\\\\u0259]rba(ijani?|ycan(ca)?|yjan)|\\\\u043d\\\\u0433\\\\u043b\\\\u0438\n    \\\\u0439\\\\u0441\\\\u043a\\\\u0438\\\\u0439)|b(ahasa( (indonesia|jawa|malaysia|melayu))?|angla|as(k|qu)e|[aeo]ng[ao]\n    ?li|elarusian?|okm\\\\u00e5l|osanski|ra[sz]il(ian?)?|ritish( kannada)?|ulgarian?)|c(ebuano|hina|hinese(\n    simplified)?|zech|roat([eo]|ian?)|atal[a\\\\u00e0]n?|\\\\u0440\\\\u043f\\\\u0441\\\\u043a\\\\u0438|antonese)|[c\\\\u010d]\n    (esky|e[s\\\\u0161]tina)\\r\\n|d(an(isc?h|sk)|e?uts?ch)|e(esti|ll[hi]nika|ng(els|le(ski|za)|lisc?h)|spa(g?[n\n    \\\\u00f1]h?i?ol|nisc?h)|speranto|stonian|usk[ae]ra)|f(ilipino|innish|ran[c\\\\u00e7](ais|e|ez[ao])|ren[cs]h\n    |arsi|rancese)|g(al(ego|ician)|uja?rati|ree(ce|k)|eorgian|erman[ay]?|ilaki)|h(ayeren|ebrew|indi|rvatski\n    |ungar(y|ian))|i(celandic|ndian?|ndonesian?|ngl[e\\\\u00ea]se?|ngilizce|tali(ano?|en(isch)?))|ja(pan(ese)?\n    |vanese)|k(a(nn?ada|zakh)|hmer|o(rean?|sova)|urd[i\\\\u00ee])|l(at(in[ao]?|vi(an?|e[s\\\\u0161]u))|ietuvi[u\\\\u0173]\n    |ithuanian?)|m(a[ck]edon(ian?|ski)|agyar|alay(alam?|sian?)?|altese|andarin|arathi|elayu|ontenegro|ongol(ian?)\n    |yanmar)|n(e(d|th)erlands?|epali|orw(ay|egian)|orsk( bokm[a\\\\u00e5]l)?|ynorsk)|o(landese|dia)|p(ashto|ersi?an?\n    |ol(n?isc?h|ski)|or?tugu?[e\\\\u00ea]se?(( d[eo])? brasil(eiro)?| ?\\\\(brasil\\\\))?|unjabi)|r(om[a\\\\u00e2i]ni?\n    [a\\\\u0103]n?|um(ano|\\\\u00e4nisch)|ussi([ao]n?|sch))|s(anskrit|erbian|imple english|inha?la|lov(ak(ian?)?\n    |en\\\\u0161?[c\\\\u010d]ina|en(e|ij?an?)|uomi)|erbisch|pagnolo?|panisc?h|rbeska|rpski|venska|c?wedisc?h|hqip)\n    |t(a(galog|mil)|elugu|hai(land)?|i[e\\\\u1ebf]ng vi[e\\\\u1ec7]t|[u\\\\u00fc]rk([c\\\\u00e7]e|isc?h|i\\\\u015f|ey))\n    |u(rdu|zbek)|v(alencia(no?)?|ietnamese)|welsh|(\\\\u0430\\\\u043d\\\\u0433\\\\u043b\\\\u0438\\\\u0438\\\\u0441|[k\\\\u043a]\n    \\\\u0430\\\\u043b\\\\u043c\\\\u044b\\\\u043a\\\\u0441|[k\\\\u043a]\\\\u0430\\\\u0437\\\\u0430\\\\u0445\\\\u0441|\\\\u043d\\\\u0435\\\\u043c\n    \\\\u0435\\\\u0446|[p\\\\u0440]\\\\u0443\\\\u0441\\\\u0441|[y\\\\u0443]\\\\u0437\\\\u0431\\\\u0435\\\\u043a\\\\u0441)\\\\u043a\\\\u0438\n    \\\\u0439( \\\\u044f\\\\u0437\\\\u044b\\\\u043a)??|\\\\u05e2\\\\u05d1\\\\u05e8\\\\u05d9\\\\u05ea|[k\\\\u043a\\\\u049b](\\\\u0430\\\\u0437\n    \\\\u0430[\\\\u043a\\\\u049b]\\\\u0448\\\\u0430|\\\\u044b\\\\u0440\\\\u0433\\\\u044b\\\\u0437\\\\u0447\\\\u0430|\\\\u0438\\\\u0440\\\\u0438\n    \\\\u043b\\\\u043b)|\\\\u0443\\\\u043a\\\\u0440\\\\u0430\\\\u0457\\\\u043d\\\\u0441\\\\u044c\\\\u043a(\\\\u0430|\\\\u043e\\\\u044e)|\n    \\\\u0431(\\\\u0435\\\\u043b\\\\u0430\\\\u0440\\\\u0443\\\\u0441\\\\u043a\\\\u0430\\\\u044f|\\\\u044a\\\\u043b\\\\u0433\\\\u0430\\\\u0440\n    \\\\u0441\\\\u043a\\\\u0438( \\\\u0435\\\\u0437\\\\u0438\\\\u043a)?)|\\\\u03b5\\\\u03bb\\\\u03bb[\\\\u03b7\\\\u03b9]\\\\u03bd\\\\u03b9\n    \\\\u03ba(\\\\u03ac|\\\\u03b1)|\\\\u10e5\\\\u10d0\\\\u10e0\\\\u10d7\\\\u10e3\\\\u10da\\\\u10d8|\\\\u0939\\\\u093f\\\\u0928\\\\u094d\n    \\\\u0926\\\\u0940|\\\\u0e44\\\\u0e17\\\\u0e22|[m\\\\u043c]\\\\u043e\\\\u043d\\\\u0433\\\\u043e\\\\u043b(\\\\u0438\\\\u0430)?|([c\n    \\\\u0441]\\\\u0440\\\\u043f|[m\\\\u043c]\\\\u0430\\\\u043a\\\\u0435\\\\u0434\\\\u043e\\\\u043d)\\\\u0441\\\\u043a\\\\u0438|\\\\u0627\n    \\\\u0644\\\\u0639\\\\u0631\\\\u0628\\\\u064a\\\\u0629|\\\\u65e5\\\\u672c\\\\u8a9e|\\\\ud55c\\\\uad6d(\\\\ub9d0|\\\\uc5b4)|\\\\u200c\n    \\\\u0939\\\\u093f\\\\u0928\\\\u0926\\\\u093c\\\\u093f|\\\\u09ac\\\\u09be\\\\u0982\\\\u09b2\\\\u09be|\\\\u0a2a\\\\u0a70\\\\u0a1c\\\\u0a3e\n    \\\\u0a2c\\\\u0a40|\\\\u092e\\\\u0930\\\\u093e\\\\u0920\\\\u0940|\\\\u0c95\\\\u0ca8\\\\u0ccd\\\\u0ca8\\\\u0ca1|\\\\u0627\\\\u064f\\\\u0631\n    \\\\u062f\\\\u064f\\\\u0648|\\\\u0ba4\\\\u0bae\\\\u0bbf\\\\u0bb4\\\\u0bcd|\\\\u0c24\\\\u0c46\\\\u0c32\\\\u0c41\\\\u0c17\\\\u0c41|\\\\u0a97\n    \\\\u0ac1\\\\u0a9c\\\\u0ab0\\\\u0abe\\\\u0aa4\\\\u0ac0|\\\\u0641\\\\u0627\\\\u0631\\\\u0633\\\\u06cc|\\\\u067e\\\\u0627\\\\u0631\\\\u0633\n    \\\\u06cc|\\\\u0d2e\\\\u0d32\\\\u0d2f\\\\u0d3e\\\\u0d33\\\\u0d02|\\\\u067e\\\\u069a\\\\u062a\\\\u0648|\\\\u1019\\\\u103c\\\\u1014\\\\u103a\n    \\\\u1019\\\\u102c\\\\u1018\\\\u102c\\\\u101e\\\\u102c|\\\\u4e2d\\\\u6587(\\\\u7b80\\\\u4f53|\\\\u7e41\\\\u9ad4)?|\\\\u4e2d\\\\u6587\n    \\\\uff08(\\\\u7b80\\\\u4f53?|\\\\u7e41\\\\u9ad4)\\\\uff09|\\\\u7b80\\\\u4f53|\\\\u7e41\\\\u9ad4)( language)??($|\\\\n)";
    private final Pattern pattern_ContainLanguageWord = Pattern.compile(regex_ContainLanguageWord());
    private final Matcher matcher_ContainLanguageWord = pattern_ContainLanguageWord().matcher("");
    private final Pattern pattern_WordContainURL = Pattern.compile("\\b(https?:\\/\\/|www\\.)\\S{10}.*", 226);
    private final Matcher matcher_WordContainURL = pattern_WordContainURL().matcher("");
    private final Pattern pattern_longestWord = Pattern.compile("\\p{IsAlphabetic}+", 226);
    private final Matcher matcher_longestWord = pattern_WordContainURL().matcher("");
    private final String[] luisVonAhnWordlist = {"abbo", "abo", "abortion", "abuse", "addict", "addicts", "adult", "africa", "african", "alla", "allah", "alligatorbait", "amateur", "american", "anal", "analannie", "analsex", "angie", "angry", "anus", "arab", "arabs", "areola", "argie", "aroused", "arse", "arsehole", "asian", "ass", "assassin", "assassinate", "assassination", "assault", "assbagger", "assblaster", "assclown", "asscowboy", "asses", "assfuck", "assfucker", "asshat", "asshole", "assholes", "asshore", "assjockey", "asskiss", "asskisser", "assklown", "asslick", "asslicker", "asslover", "assman", "assmonkey", "assmunch", "assmuncher", "asspacker", "asspirate", "asspuppies", "assranger", "asswhore", "asswipe", "athletesfoot", "attack", "australian", "babe", "babies", "backdoor", "backdoorman", "backseat", "badfuck", "balllicker", "balls", "ballsack", "banging", "baptist", "barelylegal", "barf", "barface", "barfface", "bast", "bastard ", "bazongas", "bazooms", "beaner", "beast", "beastality", "beastial", "beastiality", "beatoff", "beat-off", "beatyourmeat", "beaver", "bestial", "bestiality", "bi", "biatch", "bible", "bicurious", "bigass", "bigbastard", "bigbutt", "bigger", "bisexual", "bi-sexual", "bitch", "bitcher", "bitches", "bitchez", "bitchin", "bitching", "bitchslap", "bitchy", "biteme", "black", "blackman", "blackout", "blacks", "blind", "blow", "blowjob", "boang", "bogan", "bohunk", "bollick", "bollock", "bomb", "bombers", "bombing", "bombs", "bomd", "bondage", "boner", "bong", "boob", "boobies", "boobs", "booby", "boody", "boom", "boong", "boonga", "boonie", "booty", "bootycall", "bountybar", "bra", "brea5t", "breast", "breastjob", "breastlover", "breastman", "brothel", "bugger", "buggered", "buggery", "bullcrap", "bulldike", "bulldyke", "bullshit", "bumblefuck", "bumfuck", "bunga", "bunghole", "buried", "burn", "butchbabes", "butchdike", "butchdyke", "butt", "buttbang", "butt-bang", "buttface", "buttfuck", "butt-fuck", "buttfucker", "butt-fucker", "buttfuckers", "butt-fuckers", "butthead", "buttman", "buttmunch", "buttmuncher", "buttpirate", "buttplug", "buttstain", "byatch", "cacker", "cameljockey", "cameltoe", "canadian", "cancer", "carpetmuncher", "carruth", "catholic", "catholics", "cemetery", "chav", "cherrypopper", "chickslick", "children's", "chin", "chinaman", "chinamen", "chinese", "chink", "chinky", "choad", "chode", "christ", "christian", "church", "cigarette", "cigs", "clamdigger", "clamdiver", "clit", "clitoris", "clogwog", "cocaine", "cock", "cockblock", "cockblocker", "cockcowboy", "cockfight", "cockhead", "cockknob", "cocklicker", "cocklover", "cocknob", "cockqueen", "cockrider", "cocksman", "cocksmith", "cocksmoker", "cocksucer", "cocksuck ", "cocksucked ", "cocksucker", "cocksucking", "cocktail", "cocktease", "cocky", "cohee", "coitus", "color", "colored", "coloured", "commie", "communist", "condom", "conservative", "conspiracy", "coolie", "cooly", "coon", "coondog", "copulate", "cornhole", "corruption", "cra5h", "crabs", "crack", "crackpipe", "crackwhore", "crack-whore", "crap", "crapola", "crapper", "crappy", "crash", "creamy", "crime", "crimes", "criminal", "criminals", "crotch", "crotchjockey", "crotchmonkey", "crotchrot", "cum", "cumbubble", "cumfest", "cumjockey", "cumm", "cummer", "cumming", "cumquat", "cumqueen", "cumshot", "cunilingus", "cunillingus", "cunn", "cunnilingus", "cunntt", "cunt", "cunteyed", "cuntfuck", "cuntfucker", "cuntlick ", "cuntlicker ", "cuntlicking ", "cuntsucker", "cybersex", "cyberslimer", "dago", "dahmer", "dammit", "damn", "damnation", "damnit", "darkie", "darky", "datnigga", "dead", "deapthroat", "death", "deepthroat", "defecate", "dego", "demon", "deposit", "desire", "destroy", "deth", "devil", "devilworshipper", "dick", "dickbrain", "dickforbrains", "dickhead", "dickless", "dicklick", "dicklicker", "dickman", "dickwad", "dickweed", "diddle", "die", "died", "dies", "dike", "dildo", "dingleberry", "dink", "dipshit", "dipstick", "dirty", "disease", "diseases", "disturbed", "dive", "dix", "dixiedike", "dixiedyke", "doggiestyle", "doggystyle", "dong", "doodoo", "doo-doo", "doom", "dope", "dragqueen", "dragqween", "dripdick", "drug", "drunk", "drunken", "dumb", "dumbass", "dumbbitch", "dumbfuck", "dyefly", "dyke", "easyslut", "eatballs", "eatme", "eatpussy", "ecstacy", "ejaculate", "ejaculated", "ejaculating ", "ejaculation", "enema", "enemy", "erect", "erection", "ero", "escort", "ethiopian", "ethnic", "european", "evl", "excrement", "execute", "executed", "execution", "executioner", "explosion", "facefucker", "faeces", "fag", "fagging", "faggot", "fagot", "failed", "failure", "fairies", "fairy", "faith", "fannyfucker", "fart", "farted ", "farting ", "farty ", "fastfuck", "fat", "fatah", "fatass", "fatfuck", "fatfucker", "fatso", "fckcum", "fear", "feces", "felatio ", "felch", "felcher", "felching", "fellatio", "feltch", "feltcher", "feltching", "fetish", "fight", "filipina", "filipino", "fingerfood", "fingerfuck ", "fingerfucked ", "fingerfucker ", "fingerfuckers", "fingerfucking ", "fire", "firing", "fister", "fistfuck", "fistfucked ", "fistfucker ", "fistfucking ", "fisting", "flange", "flasher", "flatulence", "floo", "flydie", "flydye", "fok", "fondle", "footaction", "footfuck", "footfucker", "footlicker", "footstar", "fore", "foreskin", "forni", "fornicate", "foursome", "fourtwenty", "fraud", "freakfuck", "freakyfucker", "freefuck", "fu", "fubar", "fuc", "fucck", "fuck", "fucka", "fuckable", "fuckbag", "fuckbuddy", "fucked", "fuckedup", "fucker", "fuckers", "fuckface", "fuckfest", "fuckfreak", "fuckfriend", "fuckhead", "fuckher", "fuckin", "fuckina", "fucking", "fuckingbitch", "fuckinnuts", "fuckinright", "fuckit", "fuckknob", "fuckme ", "fuckmehard", "fuckmonkey", "fuckoff", "fuckpig", "fucks", "fucktard", "fuckwhore", "fuckyou", "fudgepacker", "fugly", "fuk", "fuks", "funeral", "funfuck", "fungus", "fuuck", "gangbang", "gangbanged ", "gangbanger", "gangsta", "gatorbait", "gay", "gaymuthafuckinwhore", "gaysex ", "geez", "geezer", "geni", "genital", "german", "getiton", "gin", "ginzo", "gipp", "girls", "givehead", "glazeddonut", "gob", "god", "godammit", "goddamit", "goddammit", "goddamn", "goddamned", "goddamnes", "goddamnit", "goddamnmuthafucker", "goldenshower", "gonorrehea", "gonzagas", "gook", "gotohell", "goy", "goyim", "greaseball", "gringo", "groe", "gross", "grostulation", "gubba", "gummer", "gun", "gyp", "gypo", "gypp", "gyppie", "gyppo", "gyppy", "hamas", "handjob", "hapa", "harder", "hardon", "harem", "headfuck", "headlights", "hebe", "heeb", "hell", "henhouse", "heroin", "herpes", "heterosexual", "hijack", "hijacker", "hijacking", "hillbillies", "hindoo", "hiscock", "hitler", "hitlerism", "hitlerist", "hiv", "ho", "hobo", "hodgie", "hoes", "hole", "holestuffer", "homicide", "homo", "homobangers", "homosexual", "honger", "honk", "honkers", "honkey", "honky", "hook", "hooker", "hookers", "hooters", "hore", "hork", "horn", "horney", "horniest", "horny", "horseshit", "hosejob", "hoser", "hostage", "hotdamn", "hotpussy", "hottotrot", "hummer", "husky", "hussy", "hustler", "hymen", "hymie", "iblowu", "idiot", "ikey", "illegal", "incest", "insest", "intercourse", "interracial", "intheass", "inthebuff", "israel", "israeli", "israel's", "italiano", "itch", "jackass", "jackoff", "jackshit", "jacktheripper", "jade", "jap", "japanese", "japcrap", "jebus", "jeez", "jerkoff", "jesus", "jesuschrist", "jew", "jewish", "jiga", "jigaboo", "jigg", "jigga", "jiggabo", "jigger ", "jiggy", "jihad", "jijjiboo", "jimfish", "jism", "jiz ", "jizim", "jizjuice", "jizm ", "jizz", "jizzim", "jizzum", "joint", "juggalo", "jugs", "junglebunny", "kaffer", "kaffir", "kaffre", "kafir", "kanake", "kid", "kigger", "kike", "kill", "killed", "killer", "killing", "kills", "kink", "kinky", "kissass", "kkk", "knife", "knockers", "kock", "kondum", "koon", "kotex", "krap", "krappy", "kraut", "kum", "kumbubble", "kumbullbe", "kummer", "kumming", "kumquat", "kums", "kunilingus", "kunnilingus", "kunt", "ky", "kyke", "lactate", "laid", "lapdance", "latin", "lesbain", "lesbayn", "lesbian", "lesbin", "lesbo", "lez", "lezbe", "lezbefriends", "lezbo", "lezz", "lezzo", "liberal", "libido", "licker", "lickme", "lies", "limey", "limpdick", "limy", "lingerie", "liquor", "livesex", "loadedgun", "lolita", "looser", "loser", "lotion", "lovebone", "lovegoo", "lovegun", "lovejuice", "lovemuscle", "lovepistol", "loverocket", "lowlife", "lsd", "lubejob", "lucifer", "luckycammeltoe", "lugan", "lynch", "macaca", "mad", "mafia", "magicwand", "mams", "manhater", "manpaste", "marijuana", "mastabate", "mastabater", "masterbate", "masterblaster", "mastrabator", "masturbate", "masturbating", "mattressprincess", "meatbeatter", "meatrack", "meth", "mexican", "mgger", "mggor", "mickeyfinn", "mideast", "milf", "minority", "mockey", "mockie", "mocky", "mofo", "moky", "moles", "molest", "molestation", "molester", "molestor", "moneyshot", "mooncricket", "mormon", "moron", "moslem", "mosshead", "mothafuck", "mothafucka", "mothafuckaz", "mothafucked ", "mothafucker", "mothafuckin", "mothafucking ", "mothafuckings", "motherfuck", "motherfucked", "motherfucker", "motherfuckin", "motherfucking", "motherfuckings", "motherlovebone", "muff", "muffdive", "muffdiver", "muffindiver", "mufflikcer", "mulatto", "muncher", "munt", "murder", "murderer", "muslim", "naked", "narcotic", "nasty", "nastybitch", "nastyho", "nastyslut", "nastywhore", "nazi", "necro", "negro", "negroes", "negroid", "negro's", "nig", "niger", "nigerian", "nigerians", "nigg", "nigga", "niggah", "niggaracci", "niggard", "niggarded", "niggarding", "niggardliness", "niggardliness's", "niggardly", "niggards", "niggard's", "niggaz", "nigger", "niggerhead", "niggerhole", "niggers", "nigger's", "niggle", "niggled", "niggles", "niggling", "nigglings", "niggor", "niggur", "niglet", "nignog", "nigr", "nigra", "nigre", "nip", "nipple", "nipplering", "nittit", "nlgger", "nlggor", "nofuckingway", "nook", "nookey", "nookie", "noonan", "nooner", "nude", "nudger", "nuke", "nutfucker", "nymph", "ontherag", "oral", "orga", "orgasim ", "orgasm", "orgies", "orgy", "osama", "paki", "palesimian", "palestinian", "pansies", "pansy", "panti", "panties", "payo", "pearlnecklace", "peck", "pecker", "peckerwood", "pee", "peehole", "pee-pee", "peepshow", "peepshpw", "pendy", "penetration", "peni5", "penile", "penis", "penises", "penthouse", "period", "perv", "phonesex", "phuk", "phuked", "phuking", "phukked", "phukking", "phungky", "phuq", "pi55", "picaninny", "piccaninny", "pickaninny", "piker", "pikey", "piky", "pimp", "pimped", "pimper", "pimpjuic", "pimpjuice", "pimpsimp", "pindick", "piss", "pissed", "pisser", "pisses ", "pisshead", "pissin ", "pissing", "pissoff ", "pistol", "pixie", "pixy", "playboy", "playgirl", "pocha", "pocho", "pocketpool", "pohm", "polack", "pom", "pommie", "pommy", "poo", "poon", "poontang", "poop", "pooper", "pooperscooper", "pooping", "poorwhitetrash", "popimp", "porchmonkey", "porn", "pornflick", "pornking", "porno", "pornography", "pornprincess", "pot", "poverty", "premature", "pric", "prick", "prickhead", "primetime", "propaganda", "pros", 
    "prostitute", "protestant", "pu55i", "pu55y", "pube", "pubic", "pubiclice", "pud", "pudboy", "pudd", "puddboy", "puke", "puntang", "purinapricness", "puss", "pussie", "pussies", "pussy", "pussycat", "pussyeater", "pussyfucker", "pussylicker", "pussylips", "pussylover", "pussypounder", "pusy", "quashie", "queef", "queer", "quickie", "quim", "ra8s", "rabbi", "racial", "racist", "radical", "radicals", "raghead", "randy", "rape", "raped", "raper", "rapist", "rearend", "rearentry", "rectum", "redlight", "redneck", "reefer", "reestie", "refugee", "reject", "remains", "rentafuck", "republican", "rere", "retard", "retarded", "ribbed", "rigger", "rimjob", "rimming", "roach", "robber", "roundeye", "rump", "russki", "russkie", "sadis", "sadom", "samckdaddy", "sandm", "sandnigger", "satan", "scag", "scallywag", "scat", "schlong", "screw", "screwyou", "scrotum", "scum", "semen", "seppo", "servant", "sex", "sexed", "sexfarm", "sexhound", "sexhouse", "sexing", "sexkitten", "sexpot", "sexslave", "sextogo", "sextoy", "sextoys", "sexual", "sexually", "sexwhore", "sexy", "sexymoma", "sexy-slim", "shag", "shaggin", "shagging", "shat", "shav", "shawtypimp", "sheeney", "shhit", "shinola", "shit", "shitcan", "shitdick", "shite", "shiteater", "shited", "shitface", "shitfaced", "shitfit", "shitforbrains", "shitfuck", "shitfucker", "shitfull", "shithapens", "shithappens", "shithead", "shithouse", "shiting", "shitlist", "shitola", "shitoutofluck", "shits", "shitstain", "shitted", "shitter", "shitting", "shitty ", "shoot", "shooting", "shortfuck", "showtime", "sick", "sissy", "sixsixsix", "sixtynine", "sixtyniner", "skank", "skankbitch", "skankfuck", "skankwhore", "skanky", "skankybitch", "skankywhore", "skinflute", "skum", "skumbag", "slant", "slanteye", "slapper", "slaughter", "slav", "slave", "slavedriver", "sleezebag", "sleezeball", "slideitin", "slime", "slimeball", "slimebucket", "slopehead", "slopey", "slopy", "slut", "sluts", "slutt", "slutting", "slutty", "slutwear", "slutwhore", "smack", "smackthemonkey", "smut", "snatch", "snatchpatch", "snigger", "sniggered", "sniggering", "sniggers", "snigger's", "sniper", "snot", "snowback", "snownigger", "sob", "sodom", "sodomise", "sodomite", "sodomize", "sodomy", "sonofabitch", "sonofbitch", "sooty", "sos", "soviet", "spaghettibender", "spaghettinigger", "spank", "spankthemonkey", "sperm", "spermacide", "spermbag", "spermhearder", "spermherder", "spic", "spick", "spig", "spigotty", "spik", "spit", "spitter", "splittail", "spooge", "spreadeagle", "spunk", "spunky", "squaw", "stagg", "stiffy", "strapon", "stringer", "stripclub", "stroke", "stroking", "stupid", "stupidfuck", "stupidfucker", "suck", "suckdick", "sucker", "suckme", "suckmyass", "suckmydick", "suckmytit", "suckoff", "suicide", "swallow", "swallower", "swalow", "swastika", "sweetness", "syphilis", "taboo", "taff", "tampon", "tang", "tantra", "tarbaby", "tard", "teat", "terror", "terrorist", "teste", "testicle", "testicles", "thicklips", "thirdeye", "thirdleg", "threesome", "threeway", "timbernigger", "tinkle", "tit", "titbitnipply", "titfuck", "titfucker", "titfuckin", "titjob", "titlicker", "titlover", "tits", "tittie", "titties", "titty", "tnt", "toilet", "tongethruster", "tongue", "tonguethrust", "tonguetramp", "tortur", "torture", "tosser", "towelhead", "trailertrash", "tramp", "trannie", "tranny", "transexual", "transsexual", "transvestite", "triplex", "trisexual", "trojan", "trots", "tuckahoe", "tunneloflove", "turd", "turnon", "twat", "twink", "twinkie", "twobitwhore", "uck", "uk", "unfuckable", "upskirt", "uptheass", "upthebutt", "urinary", "urinate", "urine", "usama", "uterus", "vagina", "vaginal", "vatican", "vibr", "vibrater", "vibrator", "vietcong", "violence", "virgin", "virginbreaker", "vomit", "vulva", "wab", "wank", "wanker", "wanking", "waysted", "weapon", "weenie", "weewee", "welcher", "welfare", "wetb", "wetback", "wetspot", "whacker", "whash", "whigger", "whiskey", "whiskeydick", "whiskydick", "whit", "whitenigger", "whites", "whitetrash", "whitey", "whiz", "whop", "whore", "whorefucker", "whorehouse", "wigger", "willie", "williewanker", "willy", "wn", "wog", "women's", "wop", "wtf", "wuss", "wuzzie", "xtc", "xxx", "yankee", "yellowman", "zigabo", "zipperhead"};
    private final List<String> tokens = new ArrayList(Arrays.asList(luisVonAhnWordlist()));
    private final String patternString = StringUtils.join(tokens(), "|");
    private final Pattern pattern_badWord = Pattern.compile(patternString());
    private final List<String> tokens_containbadword = new ArrayList(Arrays.asList(luisVonAhnWordlist()));
    private final String patternString_containBadword = new StringBuilder(10).append(".*\\b(").append(StringUtils.join(tokens_containbadword(), "|")).append(")\\b.*").toString();
    private final Pattern pattern_containBadword = Pattern.compile(patternString(), 226);
    private final Matcher matcher_ContainBadWord = pattern_containBadword().matcher("");
    private final String[] BanBuilderWordlist = {"$#!+", "$1ut", "$h1t", "$hit", "$lut", "'ho", "'hobag", "a$$", "anal", "anus", "ass", "assmunch", "b1tch", "ballsack", "bastard", "beaner", "beastiality", "biatch", "beeyotch", "bitch", "bitchy", "blow job", "blow me", "blowjob", "bollock", "bollocks", "bollok", "boner", "boob", "bugger", "buttplug", "c-0-c-k", "c-o-c-k", "c-u-n-t", "c.0.c.k", "c.o.c.k.", "c.u.n.t", "jerk", "jackoff", "jackhole", "j3rk0ff", "homo", "hom0", "hobag", "hell", "h0mo", "h0m0", "goddamn", "goddammit", "godamnit", "god damn", "ghey", "ghay", "gfy", "gay", "fudgepacker", "fudge packer", "fuckwad", "fucktard", "fuckoff", "fucker", "fuck-tard", "fuck off", "fuck", "fellatio", "fellate", "felching", "felcher", "felch", "fartknocker", "fart", "fannybandit", "fanny bandit", "faggot", "fagg", "fag", "f.u.c.k", "f-u-c-k", "f u c k", "dyke", "douchebag", "douche", "douch3", "doosh", "dildo", "dike", "dick", "damnit", "damn", "dammit", "d1ldo", "d1ld0", "d1ck", "d0uche", "d0uch3", "cunt", "cumstain", "cum", "crap", "coon", "cock", "clitoris", "clit", "cl1t", "cawk", "c0ck", "jerk0ff", "jerkoff", "jizz", "knob end", "knobend", "labia", "lmfao", "moolie", "muff", "nigga", "nigger", "p.u.s.s.y.", "penis", "piss", "piss-off", "pissoff", "prick", "pube", "pussy", "queer", "retard", "retarded", "s hit", "s-h-1-t", "s-h-i-t", "s.h.i.t.", "scrotum", "sex", "sh1t", "shit", "slut", "smegma", "t1t", "tard", "terd", "tit", "tits", "titties", "turd", "twat", "vag", "vagina", "wank", "wetback", "whore", "whoreface", "F*ck", "sh*t", "pu$$y", "p*ssy", "diligaf", "wtf", "stfu", "fu*ck", "fack", "shite", "fxck", "sh!t", "@sshole", "assh0le", "assho!e", "a$$hole", "a$$h0le", "a$$h0!e", "a$$h01e", "assho1e", "wh0re", "f@g", "f@gg0t", "f@ggot", "motherf*cker", "mofo", "cuntlicker", "cuntface", "dickbag", "douche waffle", "jizz bag", "cockknocker", "beatch", "fucknut", "nucking futs", "mams", "carpet muncher", "ass munch", "ass hat", "cunny", "quim", "clitty", "fuck wad", "kike", "spic", "wop", "chink", "wet back", "mother humper", "feltch", "feltcher", "FvCk", "ahole", "nads", "spick", "douchey", "Bullturds", "gonads", "bitch", "butt", "fellatio", "lmao", "s-o-b", "spunk", "he11", "jizm", "jism", "bukkake", "shiz", "wigger", "gook", "ritard", "reetard", "masterbate", "masturbate", "goatse", "masterbating", "masturbating", "hitler", "nazi", "tubgirl", "GTFO", "FOAD", "r-tard", "rtard", "hoor", "g-spot", "gspot", "vulva", "assmaster", "viagra", "Phuck", "frack", "fuckwit", "assbang", "assbanged", "assbangs", "asshole", "assholes", "asswipe", "asswipes", "b1tch", "bastards", "bitched", "bitches", "blow jobs", "boners", "bullshit", "bullshits", "bullshitted", "cameltoe", "camel toe", "camel toes", "chinc", "chincs", "chink", "chode", "chodes", "clit", "clits", "cocks", "coons", "cumming", "cunts", "d1ck", "dickhead", "dickheads", "doggie-style", "dildos", "douchebags", "dumass", "dumb ass", "dumbasses", "dykes", "f-u-c-k", "faggit", "fags", "fucked", "fucker", "fuckface", "fucks", "godamnit", "gooks", "humped", "humping", "jackass", "jap", "japs", "jerk off", "jizzed", "kikes", "knobend", "kooch", "kooches", "kootch", "mother fucker", "mother fuckers", "motherfucking", "niggah", "niggas", "niggers", "p.u.s.s.y.", "porch monkey", "porch monkeys", "pussies", "queers", "rim job", "rim jobs", "sand nigger", "sand niggers", "s0b", "shitface", "shithead", "shits", "shitted", "s.o.b.", "spik", "spiks", "twats", "whack off", "whores", "zoophile", "m-fucking", "mthrfucking", "muthrfucking", "mutherfucking", "mutherfucker", "mtherfucker", "mthrfucker", "mthrf*cker", "whorehopper", "maternal copulator", "(!)", "whoralicious", "whorealicious", "( Y )", "(@ Y @)", "(. Y .)", "aeolus", "Analprobe", "Areola", "areole", "aryan", "arian", "asses", "assfuck", "azazel", "baal", "Babes", "bang", "banger", "Barf", "bawdy", "Beardedclam", "beater", "Beaver", "beer", "bigtits", "bimbo", "Blew", "blow", "blowjobs", "blowup", "bod", "bodily", "boink", "Bone", "boned", "bong", "Boobies", "Boobs", "booby", "booger", "Bookie", "Booky", "bootee", "bootie", "Booty", "Booze", "boozer", "boozy", "bosom", "bosomy", "bowel", "bowels", "bra", "Brassiere", "breast", "breasts", "bung", "babe", "bush", "buttfuck", "cocaine", "kinky", "klan", "panties", "pedophile", "pedophilia", "pedophiliac", "punkass", "queaf", "rape", "scantily", "essohbee", "shithouse", "smut", "snatch", "toots", "doggie style", "anorexia", "bulimia", "bulimiic", "burp", "busty", "Buttfucker", "caca", "cahone", "Carnal", "Carpetmuncher", "cervix", "climax", "Cocain", "Cocksucker", "Coital", "coke", "commie", "condom", "corpse", "Coven", "Crabs", "crack", "Crackwhore", "crappy", "cuervo", "Cummin", "Cumshot", "cumshots", "Cunnilingus", "dago", "dagos", "damned", "dick-ish", "dickish", "Dickweed", "anorexic", "prostitute", "marijuana", "LSD", "PCP", "diddle", "dawgie-style", "dimwit", "dingle", "doofus", "dopey", "douche", "Drunk", "Dummy", "Ejaculate", "enlargement", "erect", "erotic", "exotic", "extacy", "Extasy", "faerie", "faery", "fagged", "fagot", "Fairy", "fisted", "fisting", "Fisty", "floozy", "fondle", "foobar", "foreskin", "frigg", "frigga", "fubar", "Fucking", "fuckup", "ganja", "gays", "glans", "godamn", "goddam", "Goldenshower", "gonad", "gonads", "Handjob", "hebe", "hemp", "heroin", "herpes", "hijack", "Hiv", "Homey", "Honky", "hooch", "hookah", "Hooker", "Hootch", "hooter", "hooters", "hump", "hussy", "hymen", "inbred", "incest", "injun", "jerked", "Jiz", "Jizm", "horny", "junkie", "junky", "kill", "kkk", "kraut", "kyke", "lech", "leper", "lesbians", "lesbos", "Lez", "Lezbian", "lezbians", "Lezbo", "Lezbos", "Lezzie", "Lezzies", "Lezzy", "loin", "loins", "lube", "Lust", "lusty", "Massa", "Masterbation", "Masturbation", "maxi", "Menses", "Menstruate", "Menstruation", "meth", "molest", "moron", "Motherfucka", "Motherfucker", "murder", "Muthafucker", "nad", "naked", "napalm", "Nappy", "nazism", "negro", "niggle", "nimrod", "ninny", "Nipple", "nooky", "Nympho", "Opiate", "opium", "oral", "orally", "organ", "orgasm", "orgies", "orgy", "ovary", "ovum", "ovums", "Paddy", "pantie", "panty", "Pastie", "pasty", "Pecker", "pedo", "pee", "Peepee", "Penetrate", "Penetration", "penial", "penile", "perversion", "peyote", "phalli", "Phallic", "Pillowbiter", "pimp", "pinko", "pissed", "pms", "polack", "porn", "porno", "pornography", "pot", "potty", "prig", "prude", "pubic", "pubis", "punky", "puss", "Queef", "quicky", "Racist", "racy", "raped", "Raper", "rapist", "raunch", "rectal", "rectum", "rectus", "reefer", "reich", "revue", "risque", "rum", "rump", "sadism", "sadist", "satan", "scag", "schizo", "screw", "Screwed", "scrog", "Scrot", "Scrote", "scrud", "scum", "seaman", "seamen", "seduce", "semen", "sex_story", "sexual", "Shithole", "Shitter", "shitty", "s*o*b", "sissy", "skag", "slave", "sleaze", "sleazy", "sluts", "smutty", "sniper", "snuff", "sodom", "souse", "soused", "sperm", "spooge", "Stab", "steamy", "Stiffy", "stoned", "strip", "Stroke", "whacking off", "suck", "sucked", "sucking", "tampon", "tawdry", "teat", "teste", "testee", "testes", "Testis", "thrust", "thug", "tinkle", "Titfuck", "titi", "titty", "whacked off", "toke", "tramp", "trashy", "tush", "undies", "unwed", "urinal", "urine", "uterus", "uzi", "valium", "virgin", "vixen", "vodka", "vomit", "voyeur", "vulgar", "wad", "wazoo", "wedgie", "weed", "weenie", "weewee", "weiner", "weirdo", "wench", "whitey", "whiz", "Whored", "Whorehouse", "Whoring", "womb", "woody", "x-rated", "xxx", "B@lls", "yeasty", "yobbo", "sumofabiatch", "doggy-style", "doggy style", "wang", "dong", "d0ng", "w@ng", "wh0reface", "wh0ref@ce", "wh0r3f@ce", "tittyfuck", "tittyfucker", "tittiefucker", "cockholster", "cockblock", "gai", "gey", "faig", "faigt", "a55", "a55hole", "gae", "corksucker", "rumprammer", "slutdumper", "niggaz", "muthafuckaz", "gigolo", "pussypounder", "herp", "herpy", "transsexual", "gender dysphoria", "orgasmic", "cunilingus", "anilingus", "dickdipper", "dickwhipper", "dicksipper", "dickripper", "dickflipper", "dickzipper", "homoey", "queero", "freex", "cunthunter", "shamedame", "slutkiss", "shiteater", "slut devil", "fuckass", "fucka$$", "clitorus", "assfucker", "dillweed", "cracker", "teabagging", "shitt", "azz", "fuk", "fucknugget", "cuntlick", "g@y", "@ss", "beotch"};
    private final List<String> tokens_banBuilder = new ArrayList(Arrays.asList(BanBuilderWordlist()));
    private final String patternString_banBuilder = new StringBuilder(10).append(".*\\b(").append(StringUtils.join(tokens_banBuilder(), "|")).append(")\\b.*").toString();
    private final Pattern pattern_banBuilder = Pattern.compile(patternString_banBuilder(), 226);
    private final Matcher matcher_BanBuilder = pattern_banBuilder().matcher("");
    private final List<String> tokens_ban = new ArrayList(Arrays.asList(BanBuilderWordlist()));
    private final String patternString_ban = StringUtils.join(tokens_ban(), "|");
    private final Pattern pattern_banWord = Pattern.compile(patternString_ban());
    private final String regex_containLanguageWord = ".*(a(frikaa?ns|lbanian?|lemanha|ng(lais|ol)|ra?b(e?|[ei]c|ian?\n    |isc?h)|rmenian?|ssamese|azeri|z[e\\\\u0259]rba(ijani?|ycan(ca)?|yjan)|\\\\u043d\\\\u0433\\\\u043b\\\\u0438\\\\u0439\n    \\\\u0441\\\\u043a\\\\u0438\\\\u0439)|b(ahasa( (indonesia|jawa|malaysia|melayu))?|angla|as(k|qu)e|[aeo]ng[ao]?li\n    |elarusian?|okm\\\\u00e5l|osanski|ra[sz]il(ian?)?|ritish( kannada)?|ulgarian?)|c(ebuano|hina|hinese( simplified)?\n    |zech|roat([eo]|ian?)|atal[a\\\\u00e0]n?|\\\\u0440\\\\u043f\\\\u0441\\\\u043a\\\\u0438|antonese)|[c\\\\u010d](esky|e[s\\\\u0161]tina)\n    \\r\\n|d(an(isc?h|sk)|e?uts?ch)|e(esti|ll[hi]nika|ng(els|le(ski|za)|lisc?h)|spa(g?[n\\\\u00f1]h?i?ol|nisc?h)|speranto\n    |stonian|usk[ae]ra)|f(ilipino|innish|ran[c\\\\u00e7](ais|e|ez[ao])|ren[cs]h|arsi|rancese)|g(al(ego|ician)\n    |uja?rati|ree(ce|k)|eorgian|erman[ay]?|ilaki)|h(ayeren|ebrew|indi|rvatski|ungar(y|ian))|i(celandic|ndian?\n    |ndonesian?|ngl[e\\\\u00ea]se?|ngilizce|tali(ano?|en(isch)?))|ja(pan(ese)?|vanese)|k(a(nn?ada|zakh)|hmer|o(rean?\n    |sova)|urd[i\\\\u00ee])|l(at(in[ao]?|vi(an?|e[s\\\\u0161]u))|ietuvi[u\\\\u0173]|ithuanian?)|m(a[ck]edon(ian?|ski)\n    |agyar|alay(alam?|sian?)?|altese|andarin|arathi|elayu|ontenegro|ongol(ian?)|yanmar)|n(e(d|th)erlands?|epali\n    |orw(ay|egian)|orsk( bokm[a\\\\u00e5]l)?|ynorsk)|o(landese|dia)|p(ashto|ersi?an?|ol(n?isc?h|ski)|or?tugu?[e\\\\u00ea]se?\n    (( d[eo])? brasil(eiro)?| ?\\\\(brasil\\\\))?|unjabi)|r(om[a\\\\u00e2i]ni?[a\\\\u0103]n?|um(ano|\\\\u00e4nisch)|ussi([ao]n?\n    |sch))|s(anskrit|erbian|imple english|inha?la|lov(ak(ian?)?|en\\\\u0161?[c\\\\u010d]ina|en(e|ij?an?)|uomi)|erbisch\n    |pagnolo?|panisc?h|rbeska|rpski|venska|c?wedisc?h|hqip)|t(a(galog|mil)|elugu|hai(land)?|i[e\\\\u1ebf]ng vi[e\\\\u1ec7]t\n    |[u\\\\u00fc]rk([c\\\\u00e7]e|isc?h|i\\\\u015f|ey))|u(rdu|zbek)|v(alencia(no?)?|ietnamese)|welsh|(\\\\u0430\\\\u043d\\\\u0433\n    \\\\u043b\\\\u0438\\\\u0438\\\\u0441|[k\\\\u043a]\\\\u0430\\\\u043b\\\\u043c\\\\u044b\\\\u043a\\\\u0441|[k\\\\u043a]\\\\u0430\\\\u0437\\\\u0430\n    \\\\u0445\\\\u0441|\\\\u043d\\\\u0435\\\\u043c\\\\u0435\\\\u0446|[p\\\\u0440]\\\\u0443\\\\u0441\\\\u0441|[y\\\\u0443]\\\\u0437\\\\u0431\\\\u0435\n    \\\\u043a\\\\u0441)\\\\u043a\\\\u0438\\\\u0439( \\\\u044f\\\\u0437\\\\u044b\\\\u043a)??|\\\\u05e2\\\\u05d1\\\\u05e8\\\\u05d9\\\\u05ea|[k\\\\u043a\n    \\\\u049b](\\\\u0430\\\\u0437\\\\u0430[\\\\u043a\\\\u049b]\\\\u0448\\\\u0430|\\\\u044b\\\\u0440\\\\u0433\\\\u044b\\\\u0437\\\\u0447\\\\u0430|\n    \\\\u0438\\\\u0440\\\\u0438\\\\u043b\\\\u043b)|\\\\u0443\\\\u043a\\\\u0440\\\\u0430\\\\u0457\\\\u043d\\\\u0441\\\\u044c\\\\u043a(\\\\u0430|\n    \\\\u043e\\\\u044e)|\\\\u0431(\\\\u0435\\\\u043b\\\\u0430\\\\u0440\\\\u0443\\\\u0441\\\\u043a\\\\u0430\\\\u044f|\\\\u044a\\\\u043b\\\\u0433\n    \\\\u0430\\\\u0440\\\\u0441\\\\u043a\\\\u0438( \\\\u0435\\\\u0437\\\\u0438\\\\u043a)?)|\\\\u03b5\\\\u03bb\\\\u03bb[\\\\u03b7\\\\u03b9]\n    \\\\u03bd\\\\u03b9\\\\u03ba(\\\\u03ac|\\\\u03b1)|\\\\u10e5\\\\u10d0\\\\u10e0\\\\u10d7\\\\u10e3\\\\u10da\\\\u10d8|\\\\u0939\\\\u093f\\\\u0928\n    \\\\u094d\\\\u0926\\\\u0940|\\\\u0e44\\\\u0e17\\\\u0e22|[m\\\\u043c]\\\\u043e\\\\u043d\\\\u0433\\\\u043e\\\\u043b(\\\\u0438\\\\u0430)?|([c\n    \\\\u0441]\\\\u0440\\\\u043f|[m\\\\u043c]\\\\u0430\\\\u043a\\\\u0435\\\\u0434\\\\u043e\\\\u043d)\\\\u0441\\\\u043a\\\\u0438|\\\\u0627\\\\u0644\n    \\\\u0639\\\\u0631\\\\u0628\\\\u064a\\\\u0629|\\\\u65e5\\\\u672c\\\\u8a9e|\\\\ud55c\\\\uad6d(\\\\ub9d0|\\\\uc5b4)|\\\\u200c\\\\u0939\\\\u093f\n    \\\\u0928\\\\u0926\\\\u093c\\\\u093f|\\\\u09ac\\\\u09be\\\\u0982\\\\u09b2\\\\u09be|\\\\u0a2a\\\\u0a70\\\\u0a1c\\\\u0a3e\\\\u0a2c\\\\u0a40|\n    \\\\u092e\\\\u0930\\\\u093e\\\\u0920\\\\u0940|\\\\u0c95\\\\u0ca8\\\\u0ccd\\\\u0ca8\\\\u0ca1|\\\\u0627\\\\u064f\\\\u0631\\\\u062f\\\\u064f\\\\u0648\n    |\\\\u0ba4\\\\u0bae\\\\u0bbf\\\\u0bb4\\\\u0bcd|\\\\u0c24\\\\u0c46\\\\u0c32\\\\u0c41\\\\u0c17\\\\u0c41|\\\\u0a97\\\\u0ac1\\\\u0a9c\\\\u0ab0\\\\u0abe\n    \\\\u0aa4\\\\u0ac0|\\\\u0641\\\\u0627\\\\u0631\\\\u0633\\\\u06cc|\\\\u067e\\\\u0627\\\\u0631\\\\u0633\\\\u06cc|\\\\u0d2e\\\\u0d32\\\\u0d2f\\\\u0d3e\n    \\\\u0d33\\\\u0d02|\\\\u067e\\\\u069a\\\\u062a\\\\u0648|\\\\u1019\\\\u103c\\\\u1014\\\\u103a\\\\u1019\\\\u102c\\\\u1018\\\\u102c\\\\u101e\\\\u102c\n    |\\\\u4e2d\\\\u6587(\\\\u7b80\\\\u4f53|\\\\u7e41\\\\u9ad4)?|\\\\u4e2d\\\\u6587\\\\uff08(\\\\u7b80\\\\u4f53?|\\\\u7e41\\\\u9ad4)\\\\uff09\n    |\\\\u7b80\\\\u4f53|\\\\u7e41\\\\u9ad4).*";
    private final Pattern pattern_forContainLanguageWord = Pattern.compile(regex_containLanguageWord());
    private final Matcher matcher_containLanguageWord = pattern_forContainLanguageWord().matcher("");
    private final String[] MaleNames = {"AARON", "ADAM", "ADRIAN", "ALAN", "ALBERT", "ALBERTO", "ALEX", "ALEXANDER", "ALFRED", "ALFREDO", "ALLAN", "ALLEN", "ALVIN", "ANDRE", "ANDREW", "ANDY", "ANGEL", "ANTHONY", "ANTONIO", "ARMANDO", "ARNOLD", "ARTHUR", "BARRY", "BEN", "BENJAMIN", "BERNARD", "BILL", "BILLY", "BOB", "BOBBY", "BRAD", "BRADLEY", "BRANDON", "BRENT", "BRETT", "BRIAN", "BRUCE", "BRYAN", "BYRON", "CALVIN", "CARL", "CARLOS", "CASEY", "CECIL", "CHAD", "CHARLES", "CHARLIE", "CHESTER", "CHRIS", "CHRISTIAN", "CHRISTOPHER", "CLARENCE", "CLAUDE", "CLAYTON", "CLIFFORD", "CLIFTON", "CLINTON", "CLYDE", "CODY", "COREY", "CORY", "CRAIG", "CURTIS", "DALE", "DAN", "DANIEL", "DANNY", "DARRELL", "DARREN", "DARRYL", "DARYL", "DAVE", "DAVID", "DEAN", "DENNIS", "DEREK", "DERRICK", "DON", "DONALD", "DOUGLAS", "DUANE", "DUSTIN", "DWAYNE", "DWIGHT", "EARL", "EDDIE", "EDGAR", "EDUARDO", "EDWARD", "EDWIN", "ELMER", "ENRIQUE", "ERIC", "ERIK", "ERNEST", "EUGENE", "EVERETT", "FELIX", "FERNANDO", "FLOYD", "FRANCIS", "FRANCISCO", "FRANK", "FRANKLIN", "FRED", "FREDDIE", "FREDERICK", "GABRIEL", "GARY", "GENE", "GEORGE", "GERALD", "GILBERT", "GLEN", "GLENN", "GORDON", "GREG", "GREGORY", "GUY", "HAROLD", "HARRY", "HARVEY", "HECTOR", "HENRY", "HERBERT", "HERMAN", "HOWARD", "HUGH", "IAN", "ISAAC", "IVAN", "JACK", "JACOB", "JAIME", "JAMES", "JAMIE", "JARED", "JASON", "JAVIER", "JAY", "JEFF", "JEFFERY", "JEFFREY", "JEREMY", "JEROME", "JERRY", "JESSE", "JESSIE", "JESUS", "JIM", "JIMMIE", "JIMMY", "JOE", "JOEL", "JOHN", "JOHNNIE", "JOHNNY", "JON", "JONATHAN", "JORDAN", "JORGE", "JOSE", "JOSEPH", "JOSHUA", "JUAN", "JULIAN", "JULIO", "JUSTIN", "KARL", "KEITH", "KELLY", "KEN", "KENNETH", "KENT", "KEVIN", "KIRK", "KURT", "KYLE", "LANCE", "LARRY", "LAWRENCE", "LEE", "LEO", "LEON", "LEONARD", "LEROY", "LESLIE", "LESTER", "LEWIS", "LLOYD", "LONNIE", "LOUIS", "LUIS", "MANUEL", "MARC", "MARCUS", "MARIO", "MARION", "MARK", "MARSHALL", "MARTIN", "MARVIN", "MATHEW", "MATTHEW", "MAURICE", "MAX", "MELVIN", "MICHAEL", "MICHEAL", "MIGUEL", "MIKE", "MILTON", "MITCHELL", "MORRIS", "NATHAN", "NATHANIEL", "NEIL", "NELSON", "NICHOLAS", "NORMAN", "OSCAR", "PATRICK", "PAUL", "PEDRO", "PERRY", "PETER", "PHILIP", "PHILLIP", "RAFAEL", "RALPH", "RAMON", "RANDALL", "RANDY", "RAUL", "RAY", "RAYMOND", "REGINALD", "RENE", "RICARDO", "RICHARD", "RICK", "RICKY", "ROBERT", "ROBERTO", "RODNEY", "ROGER", "ROLAND", "RON", "RONALD", "RONNIE", "ROSS", "ROY", "RUBEN", "RUSSELL", "RYAN", "SALVADOR", "SAM", "SAMUEL", "SCOTT", "SEAN", "SERGIO", "SETH", "SHANE", "SHAWN", "SIDNEY", "STANLEY", "STEPHEN", "STEVE", "STEVEN", "TED", "TERRANCE", "TERRENCE", "TERRY", "THEODORE", "THOMAS", "TIM", "TIMOTHY", "TODD", "TOM", "TOMMY", "TONY", "TRACY", "TRAVIS", "TROY", "TYLER", "TYRONE", "VERNON", "VICTOR", "VINCENT", "VIRGIL", "WADE", "WALLACE", "WALTER", "WARREN", "WAYNE", "WESLEY", "WILLARD", "WILLIAM", "WILLIE", "ZACHARY"};
    private final List<String> tokens_maleName = new ArrayList(Arrays.asList(MaleNames()));
    private final String patternString_MaleName = new StringBuilder(10).append(".*\\b(").append(StringUtils.join(tokens_maleName(), "|")).append(")\\b.*").toString();
    private final Pattern pattern_MaleName = Pattern.compile(patternString_MaleName(), 226);
    private final Matcher matcher_MaleName = pattern_MaleName().matcher("");
    private final String[] FemaleNames = {"AGNES", "ALICE", "ALICIA", "ALLISON", "ALMA", "AMANDA", "AMBER", "AMY", "ANA", "ANDREA", "ANGELA", "ANITA", "ANN", "ANNA", "ANNE", "ANNETTE", "ANNIE", "APRIL", "ARLENE", "ASHLEY", "AUDREY", "BARBARA", "BEATRICE", "BECKY", "BERNICE", "BERTHA", "BESSIE", "BETH", "BETTY", "BEVERLY", "BILLIE", "BOBBIE", "BONNIE", "BRANDY", "BRENDA", "BRITTANY", "CARLA", "CARMEN", "CAROL", "CAROLE", "CAROLINE", "CAROLYN", "CARRIE", "CASSANDRA", "CATHERINE", "CATHY", "CHARLENE", "CHARLOTTE", "CHERYL", "CHRISTINA", "CHRISTINE", "CHRISTY", "CINDY", "CLAIRE", "CLARA", "CLAUDIA", "COLLEEN", "CONNIE", "CONSTANCE", "COURTNEY", "CRYSTAL", "CYNTHIA", "DAISY", "DANA", "DANIELLE", "DARLENE", "DAWN", "DEANNA", "DEBBIE", "DEBORAH", "DEBRA", "DELORES", "DENISE", "DIANA", "DIANE", "DIANNE", "DOLORES", "DONNA", "DORA", "DORIS", "DOROTHY", "EDITH", "EDNA", "EILEEN", "ELAINE", "ELEANOR", "ELIZABETH", "ELLA", "ELLEN", "ELSIE", "EMILY", "EMMA", "ERICA", "ERIKA", "ERIN", "ESTHER", "ETHEL", "EVA", "EVELYN", "FELICIA", "FLORENCE", "FRANCES", "GAIL", "GEORGIA", "GERALDINE", "GERTRUDE", "GINA", "GLADYS", "GLENDA", "GLORIA", "GRACE", "GWENDOLYN", "HAZEL", "HEATHER", "HEIDI", "HELEN", "HILDA", "HOLLY", "IDA", "IRENE", "IRMA", "JACKIE", "JACQUELINE", "JAMIE", "JANE", "JANET", "JANICE", "JEAN", "JEANETTE", "JEANNE", "JENNIE", "JENNIFER", "JENNY", "JESSICA", "JESSIE", "JILL", "JO", "JOAN", "JOANN", "JOANNE", "JOSEPHINE", "JOY", "JOYCE", "JUANITA", "JUDITH", "JUDY", "JULIA", "JULIE", "JUNE", "KAREN", "KATHERINE", "KATHLEEN", "KATHRYN", "KATHY", "KATIE", "KATRINA", "KAY", "KELLY", "KIM", "KIMBERLY", "KRISTEN", "KRISTIN", "KRISTINA", "LAURA", "LAUREN", "LAURIE", "LEAH", "LENA", "LEONA", "LESLIE", "LILLIAN", "LILLIE", "LINDA", "LISA", "LOIS", "LORETTA", "LORI", "LORRAINE", "LOUISE", "LUCILLE", "LUCY", "LYDIA", "LYNN", "MABEL", "MAE", "MARCIA", "MARGARET", "MARGIE", "MARIA", "MARIAN", "MARIE", "MARILYN", "MARION", "MARJORIE", "MARLENE", "MARSHA", "MARTHA", "MARY", "MATTIE", "MAUREEN", "MAXINE", "MEGAN", "MELANIE", "MELINDA", "MELISSA", "MICHELE", "MICHELLE", "MILDRED", "MINNIE", "MIRIAM", "MISTY", "MONICA", "MYRTLE", "NANCY", "NAOMI", "NATALIE", "NELLIE", "NICOLE", "NINA", "NORA", "NORMA", "OLGA", "PAMELA", "PATRICIA", "PATSY", "PAULA", "PAULINE", "PEARL", "PEGGY", "PENNY", "PHYLLIS", "PRISCILLA", "RACHEL", "RAMONA", "REBECCA", "REGINA", "RENEE", "RHONDA", "RITA", "ROBERTA", "ROBIN", "ROSA", "ROSE", "ROSEMARY", "RUBY", "RUTH", "SALLY", "SAMANTHA", "SANDRA", "SARA", "SARAH", "SHANNON", "SHARON", "SHEILA", "SHELLY", "SHERRI", "SHERRY", "SHIRLEY", "SONIA", "STACEY", "STACY", "STELLA", "STEPHANIE", "SUE", "SUSAN", "SUZANNE", "SYLVIA", "TAMARA", "TAMMY", "TANYA", "TARA", "TERESA", "TERRI", "TERRY", "THELMA", "THERESA", "TIFFANY", "TINA", "TONI", "TONYA", "TRACEY", "TRACY", "VALERIE", "VANESSA", "VELMA", "VERA", "VERONICA", "VICKI", "VICKIE", "VICTORIA", "VIOLA", "VIOLET", "VIRGINIA", "VIVIAN", "WANDA", "WENDY", "WILLIE", "WILMA", "YOLANDA", "YVONNE"};
    private final List<String> tokens_FemaleName = new ArrayList(Arrays.asList(FemaleNames()));
    private final String patternString_FemaleName = new StringBuilder(10).append(".*\\b(").append(StringUtils.join(tokens_FemaleName(), "|")).append(")\\b.*").toString();
    private final Pattern pattern_FeMaleName = Pattern.compile(patternString_FemaleName(), 226);
    private final Matcher matcher_FeMaleName = pattern_FeMaleName().matcher("");
    private final String[] StopWord = {"a's", "able", "about", "above", "according", "accordingly", "across", "actually", "after", "afterwards", "again", "against", "ain't", "all", "allow", "allows", "almost", "alone", "along", "already", "also", "although", "always", "am", "among", "amongst", "an", "and", "another", "any", "anybody", "anyhow", "anyone", "anything", "anyway", "anyways", "anywhere", "apart", "appear", "appreciate", "appropriate", "are", "aren't", "around", "as", "aside", "ask", "asking", "associated", "at", "available", "away", "awfully", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "believe", "below", "beside", "besides", "best", "better", "between", "beyond", "both", "brief", "but", "by", "c'mon", "c's", "came", "can", "can't", "cannot", "cant", "cause", "causes", "certain", "certainly", "changes", "clearly", "co", "com", "come", "comes", "concerning", "consequently", "consider", "considering", "contain", "containing", "contains", "corresponding", "could", "couldn't", "course", "currently", "definitely", "described", "despite", "did", "didn't", "different", "do", "does", "doesn't", "doing", "don't", "done", "down", "downwards", "during", "each", "edu", "eg", "eight", "either", "else", "elsewhere", "enough", "entirely", "especially", "et", "etc", "even", "ever", "every", "everybody", "everyone", "everything", "everywhere", "ex", "exactly", "example", "except", "far", "few", "fifth", "first", "five", "followed", "following", "follows", "for", "former", "formerly", "forth", "four", "from", "further", "furthermore", "get", "gets", "getting", "given", "gives", "go", "goes", "going", "gone", "got", "gotten", "greetings", "had", "hadn't", "happens", "hardly", "has", "hasn't", "have", "haven't", "having", "he", "he's", "hello", "help", "hence", "her", "here", "here's", "hereafter", "hereby", "herein", "hereupon", "hers", "herself", "hi", "him", "himself", "his", "hither", "hopefully", "how", "howbeit", "however", "i'd", "i'll", "i'm", "i've", "ie", "if", "ignored", "immediate", "in", "inasmuch", "inc", "indeed", "indicate", "indicated", "indicates", "inner", "insofar", "instead", "into", "inward", "is", "isn't", "it", "it'd", "it'll", "it's", "its", "itself", "just", "keep", "keeps", "kept", "know", "knows", "known", "last", "lately", "later", "latter", "latterly", "least", "less", "lest", "let", "let's", "like", "liked", "likely", "little", "look", "looking", "looks", "ltd", "mainly", "many", "may", "maybe", "me", "mean", "meanwhile", "merely", "might", "more", "moreover", "most", "mostly", "much", "must", "my", "myself", "name", "namely", "nd", "near", "nearly", "necessary", "need", "needs", "neither", "never", "nevertheless", "new", "next", "nine", "no", "nobody", "non", "none", "noone", "nor", "normally", "not", "nothing", "novel", "now", "nowhere", "obviously", "of", "off", "often", "oh", "ok", "okay", "old", "on", "once", "one", "ones", "only", "onto", "or", "other", "others", "otherwise", "ought", "our", "ours", "ourselves", "out", "outside", "over", "overall", "own", "particular", "particularly", "per", "perhaps", "placed", "please", "plus", "possible", "presumably", "probably", "provides", "que", "quite", "qv", "rather", "rd", "re", "really", "reasonably", "regarding", "regardless", "regards", "relatively", "respectively", "right", "said", "same", "saw", "say", "saying", "says", "second", "secondly", "see", "seeing", "seem", "seemed", "seeming", "seems", "seen", "self", "selves", "sensible", "sent", "serious", "seriously", "seven", "several", "shall", "she", "should", "shouldn't", "since", "six", "so", "some", "somebody", "somehow", "someone", "something", "sometime", "sometimes", "somewhat", "somewhere", "soon", "sorry", "specified", "specify", "specifying", "still", "sub", "such", "sup", "sure", "t's", "take", "taken", "tell", "tends", "th", "than", "thank", "thanks", "thanx", "that", "that's", "thats", "the", "their", "theirs", "them", "themselves", "then", "thence", "there", "there's", "thereafter", "thereby", "therefore", "therein", "theres", "thereupon", "these", "they", "they'd", "they'll", "they're", "they've", "think", "third", "this", "thorough", "thoroughly", "those", "though", "three", "through", "throughout", "thru", "thus", "to", "together", "too", "took", "toward", "towards", "tried", "tries", "truly", "try", "trying", "twice", "two", "un", "under", "unfortunately", "unless", "unlikely", "until", "unto", "up", "upon", "us", "use", "used", "useful", "uses", "using", "usually", "value", "various", "very", "via", "viz", "vs", "want", "wants", "was", "wasn't", "way", "we", "we'd", "we'll", "we're", "we've", "welcome", "well", "went", "were", "weren't", "what", "what's", "whatever", "when", "whence", "whenever", "where", "where's", "whereafter", "whereas", "whereby", "wherein", "whereupon", "wherever", "whether", "which", "while", "whither", "who", "who's", "whoever", "whole", "whom", "whose", "why", "will", "willing", "wish", "with", "within", "without", "won't", "wonder", "would", "would", "wouldn't", "yes", "yet", "you", "you'd", "you'll", "you're", "you've", "your", "yours", "yourself", "yourselves", "zero", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"};
    private final List<String> tokens_StopWords = new ArrayList(Arrays.asList(StopWord()));
    private final String patternString_stopword = new StringBuilder(10).append(".*\\b(").append(StringUtils.join(tokens_StopWords(), "|")).append(")\\b.*").toString();
    private final Pattern pattern_stopwords = Pattern.compile(patternString_stopword(), 226);
    private final Matcher matcher_stopword = pattern_stopwords().matcher("");
    private final String RegexStr = "(a(frikaa?ns|lbanian?|lemanha|ng(lais|ol)|ra?b(e?|[ei]c|ian?|isc?h)|rmenian?|ssamese|azeri|z[eə]rba(ijani?|ycan(ca)?|yjan)|нглийский)|b(ahasa( (indonesia|jawa|malaysia|melayu))?|angla|as(k|qu)e|[aeo]ng[ao]?li|elarusian?|okmål|osanski|ra[sz]il(ian?)?|ritish( kannada)?|ulgarian?)|c(ebuano|hina|hinese( simplified)?|zech|roat([eo]|ian?)|atal[aà]n?|рпски|antonese)|[cč](esky|e[sš]tina)|d(an(isc?h|sk)|e?uts?ch)|e(esti|ll[hi]nika|ng(els|le(ski|za)|lisc?h)|spa(g?[nñ]h?i?ol|nisc?h)|speranto|stonian|usk[ae]ra)|f(ilipino|innish|ran[cç](ais|e|ez[ao])|ren[cs]h|arsi|rancese)|g(al(ego|ician)|uja?rati|ree(ce|k)|eorgian|erman[ay]?|ilaki)|h(ayeren|ebrew|indi|rvatski|ungar(y|ian))|i(celandic|ndian?|ndonesian?|ngl[eê]se?|ngilizce|tali(ano?|en(isch)?))|ja(pan(ese)?|vanese)|k(a(nn?ada|zakh)|hmer|o(rean?|sova)|urd[iî])|l(at(in[ao]?|vi(an?|e[sš]u))|ietuvi[uų]|ithuanian?)|m(a[ck]edon(ian?|ski)|agyar|alay(alam?|sian?)?|altese|andarin|arathi|elayu|ontenegro|ongol(ian?)|yanmar)|n(e(d|th)erlands?|epali|orw(ay|egian)|orsk( bokm[aå]l)?|ynorsk)|o(landese|dia)|p(ashto|ersi?an?|ol(n?isc?h|ski)|or?tugu?[eê]se?(( d[eo])?brasil(eiro)?| ?\\(brasil\\))?|unjabi)|r(om[aâi]ni?[aă]n?|um(ano|änisch)|ussi([ao]n?|sch))|s(anskrit|erbian|imple english|inha?la|lov(ak(ian?)?|enš?[cč]ina|en(e|ij?an?)|uomi)|erbisch|pagnolo?|panisc?h|rbeska|rpski|venska|c?wedisc?h|hqip)|t(a(galog|mil)|elugu|hai(land)?|i[eế]ng vi[eệ]t|[uü]rk([cç]e|isc?h|iş|ey))|u(rdu|zbek)|v(alencia(no?)?|ietnamese)|welsh|(англиис|[kк]алмыкс|[kк]азахс|немец|[pр]усс|[yу]збекс|татарс)кий( язык)??|עברית|[kкқ](аза[кқ]ша|ыргызча|ирилл)|українськ(а|ою)|б(еларуская|ългарски( език)?)|ελλ[ηι]νικ(ά|α)|ქართული|हिन्दी|ไทย|[mм]онгол(иа)?|([cс]рп|[mм]акедон)ски|العربية|日本語|한국(말|어)|\u200cहिनद़ि|  বাংলা|ਪੰਜਾਬੀ|मराठी|ಕನ್ನಡ|اُردُو|தமிழ்|తెలుగు|ગુજરાતી|فارسی|پارسی|മലയാളം|پښتو|မြန်မာဘာသာ|中文(简体|繁體)?|中文（(简体?|繁體)）|简体|繁體)";

    public double[] wordFeatures(String str) {
        double[] dArr = new double[17];
        double languageWordRatioCharacter = languageWordRatioCharacter(str);
        if (Predef$.MODULE$.double2Double(languageWordRatioCharacter).isNaN()) {
            dArr[0] = 0.0d;
        } else {
            dArr[0] = Utils$.MODULE$.roundDouble(languageWordRatioCharacter);
        }
        boolean containLanguageWord = containLanguageWord(str);
        if (containLanguageWord) {
            dArr[1] = 1.0d;
        } else if (containLanguageWord) {
            dArr[1] = 0.0d;
        } else {
            dArr[1] = 0.0d;
        }
        double lowercaseWordRatio = lowercaseWordRatio(str);
        if (Predef$.MODULE$.double2Double(lowercaseWordRatio).isNaN()) {
            dArr[2] = 0.0d;
        } else {
            dArr[2] = Utils$.MODULE$.roundDouble(lowercaseWordRatio);
        }
        if (longestWord(str) != null) {
            dArr[3] = Predef$.MODULE$.Integer2int(r0);
        } else {
            dArr[3] = 0.0d;
        }
        boolean containURLWord = containURLWord(str);
        if (containURLWord) {
            dArr[4] = 1.0d;
        } else if (containURLWord) {
            dArr[4] = 0.0d;
        } else {
            dArr[4] = 0.0d;
        }
        double badWordRatio = badWordRatio(str);
        if (Predef$.MODULE$.double2Double(badWordRatio).isNaN()) {
            dArr[5] = 0.0d;
        } else {
            dArr[5] = Utils$.MODULE$.roundDouble(badWordRatio);
        }
        double uppercaseWordRatio = uppercaseWordRatio(str);
        if (Predef$.MODULE$.double2Double(uppercaseWordRatio).isNaN()) {
            dArr[6] = 0.0d;
        } else {
            dArr[6] = Utils$.MODULE$.roundDouble(uppercaseWordRatio);
        }
        double banWordRatio = banWordRatio(str);
        if (Predef$.MODULE$.double2Double(banWordRatio).isNaN()) {
            dArr[7] = 0.0d;
        } else {
            dArr[7] = Utils$.MODULE$.roundDouble(banWordRatio);
        }
        boolean femaleNameWord = femaleNameWord(str);
        if (femaleNameWord) {
            dArr[8] = 1.0d;
        } else if (femaleNameWord) {
            dArr[8] = 0.0d;
        } else {
            dArr[8] = 0.0d;
        }
        boolean maleNameWord = maleNameWord(str);
        if (maleNameWord) {
            dArr[9] = 1.0d;
        } else if (maleNameWord) {
            dArr[9] = 0.0d;
        } else {
            dArr[9] = 0.0d;
        }
        boolean containBadWord = containBadWord(str);
        if (containBadWord) {
            dArr[10] = 1.0d;
        } else if (containBadWord) {
            dArr[10] = 0.0d;
        } else {
            dArr[10] = 0.0d;
        }
        boolean banBuilderWordListWord = banBuilderWordListWord(str);
        if (banBuilderWordListWord) {
            dArr[11] = 1.0d;
        } else if (banBuilderWordListWord) {
            dArr[11] = 0.0d;
        } else {
            dArr[11] = 0.0d;
        }
        return dArr;
    }

    public double wordRatio(String str, Pattern pattern) {
        Pattern compile = Pattern.compile("\\s+");
        DoubleRef create = DoubleRef.create(0.0d);
        String[] split = compile.split(str.trim());
        if (split.length > 0) {
            create.elem = 0.0d;
        }
        Matcher matcher = pattern.matcher("");
        new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(split)).foreach(str2 -> {
            $anonfun$wordRatio$1(matcher, create, str2);
            return BoxedUnit.UNIT;
        });
        create.elem /= split.length;
        return create.elem;
    }

    public String regex_LanguageWordRatio() {
        return this.regex_LanguageWordRatio;
    }

    public Pattern pattern_LanguageWordRatio() {
        return this.pattern_LanguageWordRatio;
    }

    public double languageWordRatioCharacter(String str) {
        return wordRatio(str, pattern_LanguageWordRatio());
    }

    public String regex_ContainLanguageWord() {
        return this.regex_ContainLanguageWord;
    }

    public Pattern pattern_ContainLanguageWord() {
        return this.pattern_ContainLanguageWord;
    }

    public Matcher matcher_ContainLanguageWord() {
        return this.matcher_ContainLanguageWord;
    }

    public boolean containLanguageWord(String str) {
        boolean z = false;
        if (str != null) {
            z = matcher_ContainLanguageWord().reset(str.trim().toLowerCase()).matches();
        }
        return z;
    }

    public double uppercaseWordRatio(String str) {
        return wordRatio(str, Pattern.compile("\\p{Lu}.*"));
    }

    public double lowercaseWordRatio(String str) {
        return wordRatio(str, Pattern.compile("[\\p{L}&&[^\\p{Lu}]].*"));
    }

    public Pattern pattern_WordContainURL() {
        return this.pattern_WordContainURL;
    }

    public Matcher matcher_WordContainURL() {
        return this.matcher_WordContainURL;
    }

    public boolean containURLWord(String str) {
        boolean z = false;
        if (str != null) {
            z = matcher_WordContainURL().reset(str.trim().toLowerCase()).matches();
        }
        return z;
    }

    public Pattern pattern_longestWord() {
        return this.pattern_longestWord;
    }

    public Matcher matcher_longestWord() {
        return this.matcher_longestWord;
    }

    public Integer longestWord(String str) {
        Integer num = null;
        if (str != null) {
            num = Predef$.MODULE$.int2Integer(0);
            matcher_longestWord().reset(str.trim());
            while (matcher_longestWord().find()) {
                Integer int2Integer = Predef$.MODULE$.int2Integer(matcher_longestWord().end() - matcher_longestWord().start());
                if (Predef$.MODULE$.Integer2int(int2Integer) > Predef$.MODULE$.Integer2int(num)) {
                    num = int2Integer;
                }
            }
        }
        return num;
    }

    public String[] luisVonAhnWordlist() {
        return this.luisVonAhnWordlist;
    }

    public List<String> tokens() {
        return this.tokens;
    }

    public String patternString() {
        return this.patternString;
    }

    public Pattern pattern_badWord() {
        return this.pattern_badWord;
    }

    public double badWordRatio(String str) {
        double d = 0.0d;
        if (str != null) {
            d = wordRatio(str.toLowerCase(), pattern_badWord());
        }
        return d;
    }

    public List<String> tokens_containbadword() {
        return this.tokens_containbadword;
    }

    public String patternString_containBadword() {
        return this.patternString_containBadword;
    }

    public Pattern pattern_containBadword() {
        return this.pattern_containBadword;
    }

    public Matcher matcher_ContainBadWord() {
        return this.matcher_ContainBadWord;
    }

    public boolean containBadWord(String str) {
        boolean z = false;
        if (str != null) {
            z = matcher_ContainBadWord().reset(str).matches();
        }
        return z;
    }

    public String[] BanBuilderWordlist() {
        return this.BanBuilderWordlist;
    }

    public List<String> tokens_banBuilder() {
        return this.tokens_banBuilder;
    }

    public String patternString_banBuilder() {
        return this.patternString_banBuilder;
    }

    public Pattern pattern_banBuilder() {
        return this.pattern_banBuilder;
    }

    public Matcher matcher_BanBuilder() {
        return this.matcher_BanBuilder;
    }

    public boolean banBuilderWordListWord(String str) {
        boolean z = false;
        if (str != null) {
            z = matcher_BanBuilder().reset(str).matches();
        }
        return z;
    }

    public List<String> tokens_ban() {
        return this.tokens_ban;
    }

    public String patternString_ban() {
        return this.patternString_ban;
    }

    public Pattern pattern_banWord() {
        return this.pattern_banWord;
    }

    public double banWordRatio(String str) {
        double d = 0.0d;
        if (str != null) {
            d = wordRatio(str.toLowerCase(), pattern_banWord());
        }
        return d;
    }

    public String regex_containLanguageWord() {
        return this.regex_containLanguageWord;
    }

    public Pattern pattern_forContainLanguageWord() {
        return this.pattern_forContainLanguageWord;
    }

    public Matcher matcher_containLanguageWord() {
        return this.matcher_containLanguageWord;
    }

    public boolean containLanguageBadWordWord(String str) {
        boolean z = false;
        if (str != null) {
            z = matcher_containLanguageWord().reset(str.trim().toLowerCase()).matches();
        }
        return z;
    }

    public String[] MaleNames() {
        return this.MaleNames;
    }

    public List<String> tokens_maleName() {
        return this.tokens_maleName;
    }

    public String patternString_MaleName() {
        return this.patternString_MaleName;
    }

    public Pattern pattern_MaleName() {
        return this.pattern_MaleName;
    }

    public Matcher matcher_MaleName() {
        return this.matcher_MaleName;
    }

    public boolean maleNameWord(String str) {
        boolean z = false;
        if (str != null) {
            z = matcher_MaleName().reset(str).matches();
        }
        return z;
    }

    public String[] FemaleNames() {
        return this.FemaleNames;
    }

    public List<String> tokens_FemaleName() {
        return this.tokens_FemaleName;
    }

    public String patternString_FemaleName() {
        return this.patternString_FemaleName;
    }

    public Pattern pattern_FeMaleName() {
        return this.pattern_FeMaleName;
    }

    public Matcher matcher_FeMaleName() {
        return this.matcher_FeMaleName;
    }

    public boolean femaleNameWord(String str) {
        boolean z = false;
        if (str != null) {
            z = matcher_FeMaleName().reset(str).matches();
        }
        return z;
    }

    public Integer currentPreviousCommentTialNumberSharingWords(String str, String str2) {
        IntRef create = IntRef.create(0);
        Pattern compile = Pattern.compile("\\s+");
        ObjectRef create2 = ObjectRef.create(str2);
        if (str != null ? !str.equals("") : "" != 0) {
            String[] split = compile.split(str.trim());
            if (split.length > 0) {
                create.elem = 0;
            }
            new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(split)).foreach(str3 -> {
                $anonfun$currentPreviousCommentTialNumberSharingWords$1(create2, create, str3);
                return BoxedUnit.UNIT;
            });
        }
        return Predef$.MODULE$.int2Integer(create.elem);
    }

    public String[] StopWord() {
        return this.StopWord;
    }

    public List<String> tokens_StopWords() {
        return this.tokens_StopWords;
    }

    public String patternString_stopword() {
        return this.patternString_stopword;
    }

    public Pattern pattern_stopwords() {
        return this.pattern_stopwords;
    }

    public Matcher matcher_stopword() {
        return this.matcher_stopword;
    }

    public Integer currentPreviousCommentTialNumberSharingWordsWithoutStopWords(String str, String str2) {
        IntRef create = IntRef.create(0);
        Pattern compile = Pattern.compile("\\s+");
        ObjectRef create2 = ObjectRef.create(str2);
        if (str != null ? !str.equals("") : "" != 0) {
            String[] split = compile.split(str.trim());
            if (split.length > 0) {
                create.elem = 0;
            }
            new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(split)).foreach(str3 -> {
                $anonfun$currentPreviousCommentTialNumberSharingWordsWithoutStopWords$1(this, create2, create, str3);
                return BoxedUnit.UNIT;
            });
        }
        return Predef$.MODULE$.int2Integer(create.elem);
    }

    public double getNumberOfLinks(String str) {
        return Utils$.MODULE$.stringMatchValue(str, "https?:\\/\\/|www\\.");
    }

    public String RegexStr() {
        return this.RegexStr;
    }

    public double getNumberOfLanguageWord(String str) {
        return Utils$.MODULE$.stringMatchValue(str, RegexStr());
    }

    public double getNumberOfQId(String str) {
        return Utils$.MODULE$.stringMatchValue(str, "Q\\d{1,8}");
    }

    public float proportion(double d, double d2) {
        return (float) ((d2 - d) / (d2 + 1.0d));
    }

    public static final /* synthetic */ void $anonfun$wordRatio$1(Matcher matcher, DoubleRef doubleRef, String str) {
        String trim = str.trim();
        if (trim.equals("") || !matcher.reset(trim).matches()) {
            return;
        }
        doubleRef.elem++;
    }

    public static final /* synthetic */ void $anonfun$currentPreviousCommentTialNumberSharingWords$1(ObjectRef objectRef, IntRef intRef, String str) {
        String trim = str.trim();
        if (trim == null) {
            if ("" == 0) {
                return;
            }
        } else if (trim.equals("")) {
            return;
        }
        if (((String) objectRef.elem).contains(str)) {
            intRef.elem++;
        }
    }

    public static final /* synthetic */ void $anonfun$currentPreviousCommentTialNumberSharingWordsWithoutStopWords$1(Word word, ObjectRef objectRef, IntRef intRef, String str) {
        if (str != null) {
            boolean matches = word.matcher_stopword().reset(str).matches();
            String trim = str.trim();
            if (trim == null) {
                if ("" == 0) {
                    return;
                }
            } else if (trim.equals("")) {
                return;
            }
            if (matches || !((String) objectRef.elem).contains(str)) {
                return;
            }
            intRef.elem++;
        }
    }
}
