<?
function number2db($value)
{
$larr = localeconv();
$search = array(
$larr['decimal_point'],
$larr['mon_decimal_point'],
$larr['thousands_sep'],
$larr['mon_thousands_sep'],
$larr['currency_symbol'],
$larr['int_curr_symbol']
);
$replace = array('.', '.', '', '', '', '');
return str_replace($search, $replace, $value);
}
function get_sec()
{
$mtime = microtime();
$mtime = explode(" ",$mtime);
$mtime = $mtime[1] + $mtime[0];
return $mtime;
}
/**
*
*/
class Categorizer {
static $info_tpl = array(
'matches' => array(),
'scores' => array()
);
public static $info;
private static $KEYWORDS = array();
private static $CATS = array();
public static function lcount( $word ) {
//echo "<p>$string</p>";
//$word = iconv("cp1251", 'UTF-8', $word);
//$word = preg_replace("/[^а-яА-ЯёЁa-zA-Z]/", "", trim( $word ));
return strlen($word) > 2 ? array( preg_replace('/[^\w\-\d]/','', $word) => 1) : null;
}
public static function rcount($w1, $w2) {
$w1 = !is_array($w1) ? array() : $w1;
if($w2)
foreach ($w2 as $word => $count) {
//$word = preg_replace("/[^а-яА-ЯёЁa-zA-Z]/", "", trim( $word ));
//echo "<p>$word - !!</p>";
$w1[ $word ] += $count;
}
return $w1;
}
static function catParent(&$ar) {
foreach($ar as $a){
if($a['Value']) return $a['Value'];
}
return false;
}
static function loadKeywords(){
$kws = array();
$mysql = mysql_query("select * from Message66 order by Category ASC");
while($tmp = mysql_fetch_assoc($mysql)) $kws[] = $tmp;
self::$KEYWORDS = $kws;
$cats = array();
$mysql = mysql_query("SELECT * FROM `Classificator_deal_cats` where Value < 1 ");
while($tmp = mysql_fetch_assoc($mysql)) $cats[] = $tmp;
self::$CATS = $cats;
$res = array();
$lastcat = 0;
$lastcatPid = 0;
return $kws;
foreach($kws as &$kw){
// if($lastcat != $kw['Category']){
// $lastcat = $kw['Category'];
// $lastcatPid = self::catParent($cats);
// }
}
}
public static function execute( $text, $k = 1 ){
self::$info = self::$info_tpl;
self::loadKeywords();
$tkeywords = self::$KEYWORDS;
$text = preg_replace("/\d+/", "", strip_tags(strtolower($text)));
$splitters = '\s.,\(\)?:;';
$array = preg_split( "/[" . $splitters . "]*\\\"([^\\\"]+)\\\"[" . $splitters . "]*|[" . $splitters . "]+/", $text, 0, PREG_SPLIT_DELIM_CAPTURE );
$array = array_map('strtolower', $array);
$array = array_map('trim', $array);
$array = array_map(array('Categorizer','lcount'), $array);
$result = array_reduce($array, array('Categorizer','rcount'));
//var_dump($result);
arsort($result);
$scores = array();
$scoreIndex = array();
$index = 0;
$printed = array();
$loaded = array();
foreach($result as $word => $n) {
$word = trim($word);
foreach( self::$KEYWORDS as &$kw) {
$name = $kw['Category'];
$scoreIndex[$name] = ++$index;
$regexp = $kw['Word'];
$weight = $kw['Weight'];
if(
(preg_match("/^$regexp$/i", iconv("cp1251", 'utf-8', $word))
|| preg_match("/^$".iconv('utf-8', "cp1251", $regexp)."$/i", $word)
|| preg_match("/^$regexp$/i", $word))
&& !$loaded[$word.$name]
) {
$loaded[$word.$name] = true;
//echo "<p><MATCHED: $word - $n ($regexp) - $name<</p>";
$scores[$name] += $weight * $k * $n;
self::$info['matches'][$word] = array('count'=> $n, 'score' => $scores[$name], 'group' => $name, 'regex' => iconv('UTF-8', 'cp1251', $regexp));
}
}
}
$ls = 0;$li = 0;
foreach($scores as $i => $s){
if( $s > $ls ) {
$ls = $s;
$li = $i;
}
}
self::$info['scores'] = $scores;
return $li;
}
static function updateAll(){
$start_time = get_sec();
$i = 0;
ini_set('memory_limit', '240M');
set_time_limit(500);
self::$KEYWORDS = self::loadKeywords();
$deals = array();
$mysql = mysql_query("select * from Message65 where Category = 0 order by Category ASC LIMIT 500");
while($tmp = mysql_fetch_assoc($mysql)) $deals[] = $tmp;
foreach($deals as &$deal){
//if(++$i > 500) break;
//echo $deal['Name'];
$cat_id = self::execute($deal['Description'] . ' ' . $deal['Name']);
mysql_query("update Message65 set Category=$cat_id Where Message_ID=" . $deal['Message_ID']);
}
$exec_time = get_sec() - $start_time;
echo "exec time : $exec_time sec.";
}
static function dump(){
// $tmp = self::$info['scores'];
// foreach( self::$info['matches'] as $k => $v) {
// foreach(self::$info['scores'] as $name => $total){
// $tmp[$name] = array()
// }
// }
return serialize(self::$info);
}
}