array(), 'scores' => array() ); public static $info; private static $KEYWORDS = array(); private static $CATS = array(); public static function lcount( $word ) { //echo "

$string

"; //$word = iconv("cp1251", 'UTF-8', $word); //$word = preg_replace("/[^а-яА-ЯёЁa-zA-Z]/", "", trim( $word )); return strlen($word) > 2 ? array( preg_replace('/[^\w\-\d]/','', $word) => 1) : null; } public static function rcount($w1, $w2) { $w1 = !is_array($w1) ? array() : $w1; if($w2) foreach ($w2 as $word => $count) { //$word = preg_replace("/[^а-яА-ЯёЁa-zA-Z]/", "", trim( $word )); //echo "

$word - !!

"; $w1[ $word ] += $count; } return $w1; } static function catParent(&$ar) { foreach($ar as $a){ if($a['Value']) return $a['Value']; } return false; } static function loadKeywords(){ $kws = array(); $mysql = mysql_query("select * from Message66 order by Category ASC"); while($tmp = mysql_fetch_assoc($mysql)) $kws[] = $tmp; self::$KEYWORDS = $kws; $cats = array(); $mysql = mysql_query("SELECT * FROM `Classificator_deal_cats` where Value < 1 "); while($tmp = mysql_fetch_assoc($mysql)) $cats[] = $tmp; self::$CATS = $cats; $res = array(); $lastcat = 0; $lastcatPid = 0; return $kws; foreach($kws as &$kw){ // if($lastcat != $kw['Category']){ // $lastcat = $kw['Category']; // $lastcatPid = self::catParent($cats); // } } } public static function execute( $text, $k = 1 ){ self::$info = self::$info_tpl; self::loadKeywords(); $tkeywords = self::$KEYWORDS; $text = preg_replace("/\d+/", "", strip_tags(strtolower($text))); $splitters = '\s.,\(\)?:;'; $array = preg_split( "/[" . $splitters . "]*\\\"([^\\\"]+)\\\"[" . $splitters . "]*|[" . $splitters . "]+/", $text, 0, PREG_SPLIT_DELIM_CAPTURE ); $array = array_map('strtolower', $array); $array = array_map('trim', $array); $array = array_map(array('Categorizer','lcount'), $array); $result = array_reduce($array, array('Categorizer','rcount')); //var_dump($result); arsort($result); $scores = array(); $scoreIndex = array(); $index = 0; $printed = array(); $loaded = array(); foreach($result as $word => $n) { $word = trim($word); foreach( self::$KEYWORDS as &$kw) { $name = $kw['Category']; $scoreIndex[$name] = ++$index; $regexp = $kw['Word']; $weight = $kw['Weight']; if( (preg_match("/^$regexp$/i", iconv("cp1251", 'utf-8', $word)) || preg_match("/^$".iconv('utf-8', "cp1251", $regexp)."$/i", $word) || preg_match("/^$regexp$/i", $word)) && !$loaded[$word.$name] ) { $loaded[$word.$name] = true; //echo "

"; $scores[$name] += $weight * $k * $n; self::$info['matches'][$word] = array('count'=> $n, 'score' => $scores[$name], 'group' => $name, 'regex' => iconv('UTF-8', 'cp1251', $regexp)); } } } $ls = 0;$li = 0; foreach($scores as $i => $s){ if( $s > $ls ) { $ls = $s; $li = $i; } } self::$info['scores'] = $scores; return $li; } static function updateAll(){ $start_time = get_sec(); $i = 0; ini_set('memory_limit', '240M'); set_time_limit(500); self::$KEYWORDS = self::loadKeywords(); $deals = array(); $mysql = mysql_query("select * from Message65 where Category = 0 order by Category ASC LIMIT 500"); while($tmp = mysql_fetch_assoc($mysql)) $deals[] = $tmp; foreach($deals as &$deal){ //if(++$i > 500) break; //echo $deal['Name']; $cat_id = self::execute($deal['Description'] . ' ' . $deal['Name']); mysql_query("update Message65 set Category=$cat_id Where Message_ID=" . $deal['Message_ID']); } $exec_time = get_sec() - $start_time; echo "exec time : $exec_time sec."; } static function dump(){ // $tmp = self::$info['scores']; // foreach( self::$info['matches'] as $k => $v) { // foreach(self::$info['scores'] as $name => $total){ // $tmp[$name] = array() // } // } return serialize(self::$info); } }