特黄特黄,麻豆性视频,国产一区第一页

本文實例講述了java使用dfa算法實現(xiàn)過濾多家公司自定義敏感字功能。分享給大家供大家參考，具體如下：

背景

因為最近有通訊有個需求，說需要讓多家客戶公司可以自定義敏感詞過濾掉他們自定義的規(guī)則，選擇了dfa算法來做，不過和以前傳統(tǒng)了dfa寫法不太一樣了

模式圖

Java使用DFA算法實現(xiàn)過濾多家公司自定義敏感字功能詳解

直接上代碼

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

									public class keywordfilter {

									//  private static reentrantreadwritelock lock = new reentrantreadwritelock();

									  public static map<string, hashmap> currentmap = new concurrenthashmap<string, hashmap>();

									  public static map nowhash = null;

									  public static object wordmap;// map子節(jié)點

									  // 不建立對象

									  private keywordfilter() {

									  }

									  private static string getkey(int companyid) {

									    return "companyid" + companyid;

									  }

									  /*

									   * <p>說明:清掃內(nèi)容</p>

									   *

									   * @author:姚旭民

									   *

									   * @data:2017-8-22 上午10:13:11

									   */

									  public static void clear() {

									    try {

									      currentmap.clear();

									    } catch (exception e) {

									      e.printstacktrace();

									    } finally {

									    }

									  }

									  /*

									   * <p>說明:各個渠道的過濾字符</p>

									   *

									   * @author:姚旭民

									   *

									   * @data:2017-8-20 下午2:55:06

									   */

									  public static void savekeywords(int companyid, list<string> keywords) {

									    try {

									      map tempallmap = currentmap;

									      string key = getkey(companyid);

									      int l = keywords.size();

									      int il;

									      map tempmap;

									      for (int i = 0; i < l; i++) {

									        string key2 = keywords.get(i).trim();// 去掉空白

									        nowhash = currentmap;

									        il = key2.length();

									        for (int j = 0; j < il; j++) {

									          char word = key2.charat(j);

									          tempmap = (map) nowhash.get(word);

									          wordmap = nowhash.get(word);

									          if (wordmap != null) {// 檢查數(shù)據(jù)

									            if (!tempmap.containskey(key)) {

									              nowhash.put(key, 0);

									            }

									            nowhash = (hashmap) wordmap;

									          } else {

									            hashmap<string, string> newwordhash = new hashmap<string, string>();

									            newwordhash.put(key, "0");

									            nowhash.put(word, newwordhash);

									            nowhash = newwordhash;

									          }

									          if (j == il - 1) {

									            nowhash.put(key, "1");

									          }

									        }

									      }

									    } catch (exception e) {

									      e.printstacktrace();

									    } finally {

									      nowhash = null;

									      wordmap = null;

									    }

									  }

									  /*

									   * <p>說明:替換掉對應(yīng)的渠道規(guī)定掉敏感字</p>

									   *

									   * @author:姚旭民

									   *

									   * @data:2017-8-20 上午11:41:47

									   */

									  public static list<string> repword(int companyid, string txt) {

									    map tempmap = currentmap;

									    list<string> result = new arraylist<string>();

									    string key = getkey(companyid);

									    nowhash = currentmap;

									    int l = txt.length();

									    char word;

									    string keywordstr = "";

									    string keystatu;

									    stringbuilder keyword = new stringbuilder();// 敏感字

									    for (int i = 0; i < l; i++) {

									      word = txt.charat(i);

									      wordmap = nowhash.get(word);

									      if (wordmap != null) {// 找到類似敏感字的字體，開始查詢

									        keyword.append(word);

									        object te = nowhash = (hashmap) wordmap;

									        // 遍歷到這一步，就符合完整的關(guān)鍵字模板

									        if (nowhash.get(key) != null

									            && nowhash.get(key).tostring().equals("1")) {// 確定是敏感字，開始替換

									          if (i < l - 1 && nowhash.get(txt.charat(i + 1)) != null) {// 優(yōu)先過濾長敏感詞，去掉就檳城了優(yōu)先過濾段敏感詞

									            continue;

									          }

									          txt = txt.replaceall(keyword.tostring(), "*");

									          nowhash = currentmap;

									          keywordstr += keyword.tostring() + ",";

									          i = i - keyword.length() + 1;

									          l = txt.length();// 重新獲取字符長度

									          keyword.delete(0, keyword.length());// 清空數(shù)據(jù)

									        }

									      } else {// 這個字不是敏感字，直接排除

									        nowhash = currentmap;

									        keyword.delete(0, keyword.length());// 清空數(shù)據(jù)

									        continue;

									      }

									    }

									    // 清除內(nèi)存指向

									    nowhash = null;

									    wordmap = null;

									    result.add(txt);

									    result.add(keywordstr.length() - 1 > 0 ? keywordstr.substring(0,

									        keywordstr.length() - 1) : keywordstr);

									    return result;

									  }

									  /*

									   * <p>說明:檢查是否存在敏感字</p>

									   *

									   * @author:姚旭民

									   *

									   * @data:2017-8-20 下午3:00:06 專門設(shè)計成私有的，如果沒有理由，別改動他

									   */

									  private static int checkkeywords(string txt, int companyid, int begin) {

									    int result = 0;

									    string key = getkey(companyid);

									    try {

									      nowhash = currentmap;

									      int l = txt.length();

									      char word = 0;

									      for (int i = begin; i < l; i++) {

									        word = txt.charat(i);

									        wordmap = nowhash.get(word);

									        if (wordmap != null) {

									          result++;

									          nowhash = (hashmap) wordmap;

									          if (((string) nowhash.get(key)).equals("1")) {

									            nowhash = null;

									            wordmap = null;

									            return result;

									          }

									        } else {

									          result = 0;

									          break;

									        }

									      }

									    } catch (exception e) {

									      e.printstacktrace();

									    } finally {

									      nowhash = null;

									      wordmap = null;

									      return result;

									    }

									  }

									  /*

									   * <p>說明:返回檢查的文本中包含的敏感字</p>

									   *

									   * @author:姚旭民

									   *

									   * @data:2017-8-20 下午3:32:53

									   */

									  public static string gettxtkeywords(string txt, int companyid) {

									    string result = null;

									    stringbuilder temp = new stringbuilder();

									    string key;

									    int l = txt.length();

									    for (int i = 0; i < l;) {

									      int len = checkkeywords(txt, companyid, i);

									      if (len > 0) {

									        key = (txt.substring(i, i + len));// 挑選出來的關(guān)鍵字

									        temp.append(key + ",");

									        txt = txt.replaceall(key, "");// 挑選出來的關(guān)鍵字替換成空白，加快挑選速度

									        l = txt.length();

									      } else {

									        i++;

									      }

									    }

									    if (temp.length() > 0) {

									      result = temp.substring(0, temp.length() - 1);

									    }

									    return result;

									  }

									  /*

									   * <p>說明:判斷文中是否包含渠道規(guī)定的敏感字</p>

									   *

									   * @author:姚旭民

									   *

									   * @data:2017-8-20 下午3:33:19

									   */

									  public boolean iskeywords(string txt, int companyid) {

									    for (int i = 0; i < txt.length(); i++) {

									      int len = checkkeywords(txt, companyid, i);

									      if (len > 0) {

									        return true;

									      }

									    }

									    return false;

									  }

									  public static void main(string[] arg) {

									    list<string> keywords = new arraylist<string>();

									    keywords.add("傻×");

									    keywords.add("漢奸");

									    keywords.add("草");

									    keywords.add("草泥馬");

									    keywordfilter.savekeywords(1, keywords);

									    string txt = "是傻×漢奸傻a傻b傻c傻d漢奸傻×草泥馬";

									    list<string> list = repword(1, txt);

									    system.out.println("文中包含的敏感字為:" + list.get(1));

									    system.out.println("原文:" + txt);

									    system.out.println("敏感字過濾后:" + list.get(0));

									  }

									}