返回信息流本人对php完全无力,java三脚猫功夫。
代码如下,两个txt文件都有。。。
<?php
class Corrector{
private static $NWORDS;
static function words($text){
$matches = preg_match_all("/[a-z]+/", strtolower($text), $output);
return $output[0];
}
static function train($text){
foreach($text as $word)
@$model[$word] += 1;
return $model;
}
static function read(){
if(!file_exists('serialized_dictionary.txt')){
$NWORDS = train(words(file_get_contents("big.txt")));
$fp = fopen("serialized_dictionary.txt", "w+");
fwrite($fp, serialize($NWORDS));
fclose($fp);
}else
$NWORDS = unserialize(file_get_contents("serialized_dictionary.txt"));
return $NWORDS;
}
static function edits1($word){
$alphabet = 'abcdefghijklmnopqrstuvwxyz';
$alphabet = str_split($alphabet);
$n = strlen($word);
$edits = array();
for($i = 0;$i < $n;$i++){
$edits[] = substr($word, 0, $i) . substr($word, $i + 1); //delete
if($i < $n-1)
$edits[] = substr($word, 0, $i) . $word[$i + 1] . $word[$i] . substr($word, $i + 2); //transposes
foreach($alphabet as $c){
$edits[] = substr($word, 0, $i) . $c . substr($word, $i + 1); //replaces
$edits[] = substr($word, 0, $i) . $c . substr($word, $i); //insert
}
}
foreach($alphabet as $c)
$edits[] = substr($word, 0, $n) . $c; //insert n+1
return $edits;
}
static function known($words){
$known = array();
foreach($words as $w)
if(array_key_exists($w, self :: $NWORDS))
$known[] = $w;
return $known;
}
static function known_edits2($word){
$known = array();
foreach(self :: edits1($word) as $e1)
foreach(self :: edits1($e1) as $e2)
if(array_key_exists($e2, self :: $NWORDS))
$known[] = $e2;
return $known;
}
static function getmax($words){
$max = 0;
$ret = "";
foreach($words as $w){
if(($temp = self :: $NWORDS[$w]) > $max){
$max = $temp;
$ret = $w;
}
}
return $ret;
}
static function correct($word){
if(empty(self :: $NWORDS))
self :: $NWORDS = self :: read();
$word = strtolower(trim($word));
if(empty($word))
return;
if(self :: known(array($word)))
return $word;
elseif(($temp = self :: known(self :: edits1($word))))
return self :: getmax($temp);
elseif(($temp = self :: known_edits2($word)))
return self :: getmax($temp);
else
return $word;
}
}
?>
这是一条镜像帖。来源:北邮人论坛 / java / #25430同步于 2013/5/18
该镜像源已超过 30 天没有更新,可能在源站已被删除。
Java机器人发帖
有大神帮忙把这段php代码写成java嘛?毕设要用到。。。
excellentwu
2013/5/18镜像同步3 回复
订阅后,新回复会通过你的通知中心匿名送达。
3 条回复
我毕设使用java写的,就剩这个拼写纠正的模块了。。。php完全不会。。
【 在 tootwo 的大作中提到: 】
: php 多好,为嘛非要改Java