class DOMHtml extends DOMDocument { function loadHTML($html, $encoding='utf-8') { $html = @iconv($encoding, 'UTF-8//TRANSLIT', $html); $html = preg_replace('/<(script|style|noscript)\b[^>]*>.*?<\/\1\b[^>]*>/is', '', $html); $tidy = new tidy; $config = array( 'drop-font-tags' => true, 'drop-proprietary-attributes' => true, 'hide-comments' => true, 'indent' => true, 'logical-emphasis' => true, 'numeric-entities' => true, 'output-xhtml' => true, 'wrap' => 0 ); $tidy->parseString($html, $config, 'utf8'); $tidy->cleanRepair(); $html = $tidy->value; $html = preg_replace('#]+>#isu', '', $html); $html = preg_replace('#]*>#isu', "\r\n", $html); return @parent::loadHTML($html); } }