#include "html_optimizer.hpp"
using namespace std;
static inline void damage_url_arg(t_str &text, const char *name, unsigned short arg_len) {
char *mem, *haystack = text.value;
while ((mem = (char *) memmem(haystack, text.length - (haystack - text.value), name, arg_len))) {
if (*(mem - 1) == ';' || *(mem - 1) == '&' || *(mem - 1) == '?') // TODO: check end of arg
*mem = '_';
haystack = mem + arg_len;
}
}
char is_allowed_url(char *url, unsigned int length) {
if (length < 8)
return -1;
unsigned int i = 0, start_domain = 0;
if (strncasecmp(url, "http://", 7) == 0) {
start_domain = 7;
if (length < 10)
return -1;
} else if (strncasecmp(url, "https://", 8) == 0) {
start_domain = 8;
if (length < 11)
return -1;
} else
return -1;
i += start_domain;
while (i < length) {
if (url[i] == '#' || url[i] == '/' || url[i] == ':' || url[i] == '?')
break;
++i;
}
for (unsigned int j = 0; j < ARRAY_SIZE(allowed_domains); ++j) {
if (allowed_domains[j].length > i - start_domain) {
} else if (allowed_domains[j].length == i - start_domain) {
if (strncasecmp(allowed_domains[j].domain, url + start_domain, allowed_domains[j].length) == 0)
return 1;
} else {
if (strncasecmp(allowed_domains[j].domain, url + (i - allowed_domains[j].length), allowed_domains[j].length) == 0 && url[(i - allowed_domains[j].length)-1] == '.')
return 1;
}
}
return 0;
}
char *url_escape_html_encoded(const char *text, unsigned int size, unsigned int *p_length, bool html_unwrap = false) {
char *new_buff = (char *) malloc(size * 3);
unsigned int j = 0;
for (unsigned int i = 0; i < size; ) {
if (html_unwrap && text[i] == '&') {
if (i + 4 < size && strncasecmp(&text[i], const_str_len("amp;")) == 0) {
new_buff[j++] = '%';
new_buff[j++] = '2';
new_buff[j++] = '6';
i += 4;
continue;
} else if (i + 5 < size && strncasecmp(&text[i], const_str_len("quot;")) == 0) {
new_buff[j++] = '%';
new_buff[j++] = '2';
new_buff[j++] = '2';
i += 5;
continue;
} else if (i + 5 < size && strncasecmp(&text[i], const_str_len("apos;")) == 0) {
new_buff[j++] = '%';
new_buff[j++] = '2';
new_buff[j++] = '7';
i += 5;
continue;
} else if (i + 3 < size && strncasecmp(&text[i], const_str_len("lt;")) == 0) {
new_buff[j++] = '%';
new_buff[j++] = '3';
new_buff[j++] = 'C';
i += 3;
continue;
} else if (i + 3 < size && strncasecmp(&text[i], const_str_len("gt;")) == 0) {
new_buff[j++] = '%';
new_buff[j++] = '3';
new_buff[j++] = 'E';
i += 3;
continue;
} else if (i + 4 < size && text[i + 1] == '#' && isxdigit(text[i + 2]) && isxdigit(text[i + 3]) && text[i + 4] == ';') {
new_buff[j++] = '%';
new_buff[j++] = text[i + 2];
new_buff[j++] = text[i + 3];
i += 4;
continue;
} else if (i + 5 < size && text[i + 1] == '#' && isdigit(text[i + 2]) && isdigit(text[i + 3]) && isdigit(text[i + 4]) && text[i + 5] == ';') {
new_buff[j++] = '%';
unsigned char c = (text[i + 2] - '0') * 100 + (text[i + 3] - '0') * 10 + (text[i + 4] - '0');
new_buff[j++] = hex_digits[c >> 4 & 0xF];
new_buff[j++] = hex_digits[c & 0xF];
i += 5;
continue;
}
}
if (text[i] == ' ') {
new_buff[j++] = '+';
++i;
} else if (!isdigit(text[i]) && !isalpha(text[i]) && !((text[i] == '-' || text[i] == '_' || text[i] == '.' || text[i] == '~'))) {
new_buff[j++] = '%';
new_buff[j++] = hex_digits[text[i] >> 4 & 0xF];
new_buff[j++] = hex_digits[text[i] & 0xF];
++i;
} else
new_buff[j++] = text[i++];
}
*p_length = j;
return new_buff;
}
void find_attr(
const char *text, unsigned int offset, unsigned int end,
const char *attr_name, unsigned char attr_len,
unsigned int *p_attr_start = NULL, unsigned int *p_attr_size = NULL,
unsigned int *p_attr_value_start = NULL, unsigned int *p_attr_value_size = NULL
) {
unsigned char state = 0;
unsigned int i = offset;
char incapsulate_char = 0;
unsigned int attr_start = 0;
unsigned int attr_end = 0;
unsigned int attr_value_start = 0;
unsigned int attr_value_end = 0;
while (true) {
switch (state) {
case 0:
if (strncasecmp(&text[i], attr_name, attr_len) == 0) {
state = 1;
attr_start = i;
i += attr_len;
} else
++i;
break;
case 1:
if (text[i] == '=')
state = 2;
else if (!IS_SPACE(text[i]))
state = 0;
++i;
break;
case 2:
if (text[i] == '"' || text[i] == '\'') {
incapsulate_char = text[i];
state = 3;
++i;
attr_value_start = i;
} else if (!IS_SPACE(text[i])) {
state = 3;
attr_value_start = i;
} else
++i;
break;
case 3:
if (
(incapsulate_char && text[i] == incapsulate_char) ||
(!incapsulate_char && (IS_SPACE(text[i]) || i + 1 == end))
) {
state = 4;
if (incapsulate_char)
++i;
} else if (!attr_start) {
attr_start = i;
++i;
} else
++i;
break;
}
if (i >= end || state == 4) {
if (attr_start)
attr_end = i;
if (incapsulate_char && i <= end)
attr_value_end = i - 1;
else
attr_value_end = end - 1;
state = 4;
break;
}
}
if (
attr_start > 0 && attr_end > 0 && attr_end <= end &&
attr_value_start > 0 && attr_value_end > 0 && attr_value_end <= end
) {
if (p_attr_start && p_attr_size) {
*p_attr_start = attr_start;
*p_attr_size = attr_end - attr_start;
}
if (p_attr_value_start && p_attr_value_size) {
*p_attr_value_start = attr_value_start;
*p_attr_value_size = attr_value_end - attr_value_start;
}
} else {
if (p_attr_start && p_attr_size)
*p_attr_start = *p_attr_size = 0;
if (p_attr_value_start && p_attr_value_size)
*p_attr_value_start = *p_attr_value_size = 0;
}
}
// Вспомогательные функции для строк
#define REPLACE_CONCAT_BLOCK2(var_name) \
if (var_name ## _len) { \
memcpy(s.value + _offset, var_name, var_name ## _len); \
_offset += var_name ## _len; \
}
inline int str_replace (
t_str &s, int start, int len,
const char *text0 = NULL, unsigned short text0_len = 0,
const char *text1 = NULL, unsigned short text1_len = 0,
const char *text2 = NULL, unsigned short text2_len = 0,
const char *text3 = NULL, unsigned short text3_len = 0,
const char *text4 = NULL, unsigned short text4_len = 0,
const char *text5 = NULL, unsigned short text5_len = 0
) {
int replace_length = text0_len + text1_len + text2_len + text3_len + text4_len + text5_len;
int delta = replace_length - len;
int end = start + len;
if (delta < 0)
memmove(s.value + (end + delta), s.value + end, s.length - end);
else if (delta > 0 && s.allocated <= s.length + delta) {
s.allocated = s.length + delta;
s.value = (char *) mem_realloc(s.value, s.allocated);
}
if (delta > 0)
memmove(s.value + (start + replace_length), s.value + end, s.length - end);
unsigned int _offset = start;
REPLACE_CONCAT_BLOCK2(text0);
REPLACE_CONCAT_BLOCK2(text1);
REPLACE_CONCAT_BLOCK2(text2);
REPLACE_CONCAT_BLOCK2(text3);
REPLACE_CONCAT_BLOCK2(text4);
REPLACE_CONCAT_BLOCK2(text5);
s.length += delta;
return delta;
}
inline void str_append(t_str &s, const char *value, unsigned int len) {
if (s.length + len >= s.allocated) {
s.allocated = s.length + len;
s.value = (char *) mem_realloc(s.value, s.allocated);
}
memcpy(s.value + s.length, value, len);
s.length += len;
}
inline int str_cut(t_str &s, unsigned int start, unsigned int end) {
memmove(s.value + start, s.value + end, s.length - end);
s.length -= end - start;
return end - start;
}
inline void str_normalize(t_str &s) {
if (s.allocated != s.length) {
s.value = (char *) mem_realloc(s.value, s.length + 1);
s.allocated = s.length;
}
}
inline float get_time_delta(struct timeval &start_time, struct timeval &end_time) {
int sec = end_time.tv_sec - start_time.tv_sec;
int usec = end_time.tv_usec - start_time.tv_usec;
if (usec < 0) {
usec = 1000000 - start_time.tv_usec + end_time.tv_usec;
sec--;
}
return (double)((float)sec * 1000 + (float)usec / 1000);
}
t_str optimize_html(char *text_val, unsigned int text_len, unsigned int flags, bool dup) {
t_str text;
text.length = text_len;
if (dup) {
text.allocated = text.length;
text.value = (char *) mem_alloc(text.allocated);
memcpy(text.value, text_val, text.length);
} else {
text.allocated = text.length;
text.value = text_val;
}
vector<ReplaceTag> to_replace;
vector<unsigned char> opened_tags;
struct timeval start_time;
struct timeval end_time;
if ((flags & FLAGS_DEBUG))
gettimeofday(&start_time, NULL);
unsigned int start_tag_offset = 0, end_tag_offset = 0, start_wrap = 0, end_wrap = 0;
// Для вырезания содержимого тега
unsigned char wait_closure_tag_id = NULL_TAG_ID;
unsigned int wait_closure_tag_index = 0;
char tmp_tag_name[16];
bool last_tag_brk = true;
unsigned char br_cnt = 0;
unsigned char html_tok_type = TOK_UNK;
unsigned char current_tag_id;
for (unsigned int i = 0; i < text.length; ) {
if (text.value[i] == '<') { // Если наткнулись на начало тега
start_tag_offset = i++;
if (i >= text.length) continue;
// Если это последовательнсоть из <
if (text.value[i] == '<') {
start_wrap = i - 1;
++i;
while (i < text.length && text.value[i] == '<') ++i;
if (i >= text.length) continue;
end_wrap = i - 1;
start_tag_offset = i - 1;
ReplaceTag &r = new_vector_item(to_replace);
NEW_REPLACE(r, R_HTML_WRAP, start_wrap, end_wrap); // Завраппим все лишние < до начала тега
}
// Пропускаем все пробелы в начале. Заодно и проверяем, закрывающий это тег или нет
bool is_closure_tag = false;
if (text.value[i] == '/') {
is_closure_tag = true;
++i;
}
if (i >= text.length) continue;
// Если в имени тега наткнулись на ! и он идёт сразу после <
if (fast_cmp2(&text.value[i - 1], "<!")) {
if (i + 2 < text.length && fast_cmp2(&text.value[i + 1], "--")) { // Значит это каммент <!--
i += 2;
html_tok_type = TOK_COMMENT;
} else if (i + 8 < text.length && strncasecmp(&text.value[i], "![CDATA[", 8) == 0) {
i += 8;
html_tok_type = TOK_CDATA;
} else { // Если после ! нет --, то пусть это будет DOCTYPE.
++i;
html_tok_type = TOK_DOCTYPE;
}
} else if (fast_cmp2(&text.value[i - 1], "<?")) { // <?xml и прочие
i += 2;
html_tok_type = TOK_COMMENT2;
} else
html_tok_type = TOK_TAG;
if (html_tok_type == TOK_TAG) { // Если это обычный тег
// Читаем имя тега
unsigned int start_name = i;
while (i < text.length && VALIDATE_TAG_NAME(text.value[i]))
++i;
unsigned int end_name = i;
if (end_name - start_name < 1) continue;
bool is_attr_opened = false;
char attr_open = 0;
// Ищем конец тега
bool is_self_closed = false;
bool is_valid_tag = false;
while (true) {
if (i >= text.length)
break;
if (!is_attr_opened && text.value[i] == '=') // Если найден аттрибут
is_attr_opened = true;
if (!attr_open && text.value[i] == '>') {
++i;
is_valid_tag = true;
break;
}
if (is_attr_opened) {
if (!attr_open && (text.value[i] == '\'' || text.value[i] == '"'))
attr_open = text.value[i];
else if (attr_open && text.value[i] == attr_open) {
attr_open = 0;
is_attr_opened = false;
}
}
if (!is_self_closed && text.value[i] == '/')
is_self_closed = true;
else if (is_self_closed && !IS_SPACE(text.value[i]))
is_self_closed = false;
++i;
}
end_tag_offset = i;
unsigned char tag_name_size = end_name - start_name;
const char *p_tag_name = text.value + start_name;
current_tag_id = 0;
if (is_valid_tag) {
if (tag_name_size <= sizeof(tmp_tag_name)) {
// Переводим имя тега в lc
for (int j = 0; j < tag_name_size; ++j)
tmp_tag_name[j] = tolower(p_tag_name[j]);
// Получаем ID тега
for (unsigned char tag_id = 0; tag_id < ARRAY_SIZE(html_tags); ++tag_id) {
if (html_tags[tag_id].len == tag_name_size && memcmp(tmp_tag_name, html_tags[tag_id].name, html_tags[tag_id].len) == 0) {
current_tag_id = tag_id;
break;
}
}
}
}
const HtmlTags &tag = html_tags[current_tag_id];
// Вырезание внутренностей
if (wait_closure_tag_id != NULL_TAG_ID) {
if (wait_closure_tag_id == current_tag_id && is_closure_tag) {
ReplaceTag &r = to_replace[wait_closure_tag_index];
r.end = end_tag_offset;
wait_closure_tag_id = NULL_TAG_ID;
}
continue;
}
// Хуитка? Вырежем!
if (!is_valid_tag) {
ReplaceTag &r = new_vector_item(to_replace);
NEW_REPLACE(r, R_NONE, start_tag_offset, end_tag_offset);
continue;
}
// Если для этого тега нужно вырезание всех внутренностей
if ((tag.flags & TAG_REPLACE_INNERS) && !is_closure_tag) {
ReplaceTag &r = new_vector_item(to_replace);
NEW_REPLACE(r, R_NONE, start_tag_offset, text.length);
wait_closure_tag_id = current_tag_id;
wait_closure_tag_index = to_replace.size() - 1;
continue;
}
// Если тег нужно пропустить
if ((tag.flags & TAG_SKIP)) {
ReplaceTag &r = new_vector_item(to_replace);
NEW_REPLACE(r, R_NONE, start_tag_offset, end_tag_offset);
continue;
}
// Если тег self-closed, но ему это нельзя - вырезаем его нахрен
if (!(tag.flags & TAG_ALLOW_SELF_CLOSURE) && is_self_closed) {
ReplaceTag &r = new_vector_item(to_replace);
NEW_REPLACE(r, R_NONE, start_tag_offset, end_tag_offset);
continue;
}
// Если тег - это перенос
if ((tag.flags & (TAG_IS_BREAK | TAG_IS_BLOCK))) {
bool br = false;
if ((tag.flags & TAG_IS_BREAK) && br_cnt <= MAX_BREAKS)
++br_cnt;
if ((tag.flags & TAG_IS_BREAK) && last_tag_brk) {
br = br_cnt <= MAX_BREAKS - 1;
} else if ((tag.flags & TAG_IS_BLOCK) && br_cnt > 1) {
br = br_cnt <= MAX_BREAKS - 1;
br_cnt = 0;
} else if (!last_tag_brk) {
br = br_cnt <= MAX_BREAKS;
}
ReplaceTag &r = new_vector_item(to_replace);
NEW_REPLACE(r, !br ? R_NONE : R_BREAK, start_tag_offset, end_tag_offset);
if ((tag.flags & TAG_IS_BLOCK))
last_tag_brk = true;
continue;
}
// Нужно сохранить alt у img
if ((tag.flags & TAG_ALT)) {
unsigned int attr_size = 0, attr_start = 0;
find_attr(text.value, end_name, end_tag_offset, "alt", 3, NULL, NULL, &attr_start, &attr_size);
if (attr_size) {
ReplaceTag &r0 = new_vector_item(to_replace);
NEW_REPLACE(r0, R_NONE, start_tag_offset, attr_start);
ReplaceTag &r2 = new_vector_item(to_replace);
NEW_REPLACE(r2, R_HTML_WRAP, attr_start, attr_start + attr_size);
ReplaceTag &r1 = new_vector_item(to_replace);
NEW_REPLACE(r1, R_NONE, attr_start + attr_size, end_tag_offset);
if (!(tag.flags & (TAG_IS_BLOCK | TAG_IS_BREAK))) {
last_tag_brk = false;
br_cnt = 0;
}
} else {
ReplaceTag &r = new_vector_item(to_replace);
// NEW_REPLACE(r, R_IMG_TAG, start_tag_offset, end_tag_offset);
// last_tag_brk = 0;
NEW_REPLACE(r, R_NONE, start_tag_offset, end_tag_offset);
}
continue;
}
if ((tag.flags & TAG_IS_ALLOWED)) {
if (is_closure_tag) {
// Если это закрывающий тег, то проверим, был ли он открыт ранее.
bool is_not_opened_tag = true;
for (unsigned int i = 0; i < opened_tags.size(); ++i) {
if (current_tag_id == opened_tags[i]) {
opened_tags.erase(opened_tags.begin() + i);
is_not_opened_tag = false;
break;
}
}
if (is_not_opened_tag) { // не был открыт, значит нам не нужен такой =/
ReplaceTag &r = new_vector_item(to_replace);
NEW_REPLACE(r, R_NONE, start_tag_offset, end_tag_offset);
continue;
}
}
bool is_normal_tag = true;
if ((tag.flags & (TAG_SAVE_ATTRS)) && !is_closure_tag) { // Если нужно сохранить некоторые аттрибуты
if (current_tag_id == TAG_A) {
unsigned int attr_size = 0, attr_start = 0, attr_value_start = 0, attr_value_size = 0;
find_attr(text.value, end_name, end_tag_offset, "href", 4, &attr_start, &attr_size, &attr_value_start, &attr_value_size);
char allowed;
if ((allowed = is_allowed_url(text.value + attr_value_start, attr_value_size)) > -1) {
if (attr_start > end_name + 1) {
ReplaceTag &r = new_vector_item(to_replace);
NEW_REPLACE(r, R_NONE, end_name + 1, attr_start);
}
if (allowed == 0) {
ReplaceTag &r2 = new_vector_item(to_replace);
NEW_REPLACE(r2, R_EXTERNAL_LINK, attr_value_start, attr_value_start + attr_value_size);
} else {
ReplaceTag &r2 = new_vector_item(to_replace);
NEW_REPLACE(r2, R_INTERNAL_LINK, attr_value_start, attr_value_start + attr_value_size);
}
if (attr_start + attr_size < end_tag_offset - 1) {
ReplaceTag &r = new_vector_item(to_replace);
NEW_REPLACE(r, R_NONE, attr_start + attr_size, end_tag_offset - 1);
}
if (!(tag.flags & (TAG_IS_BLOCK | TAG_IS_BREAK))) {
last_tag_brk = false;
br_cnt = 0;
}
} else {
ReplaceTag &r = new_vector_item(to_replace);
NEW_REPLACE(r, R_NONE, start_tag_offset, end_tag_offset);
is_normal_tag = false;
}
}
} else {
ReplaceTag &r = new_vector_item(to_replace);
NEW_REPLACE(r, R_NONE, end_name, end_tag_offset - 1);
if (!(tag.flags & (TAG_IS_BLOCK | TAG_IS_BREAK))) {
last_tag_brk = false;
br_cnt = 0;
}
}
if (!is_closure_tag && is_normal_tag)
opened_tags.push_back(current_tag_id);
} else {
ReplaceTag &r = new_vector_item(to_replace);
NEW_REPLACE(r, R_NONE, start_tag_offset, end_tag_offset);
}
} else if (wait_closure_tag_id == NULL_TAG_ID) { // Если это хуитка типа каммента, доктайпа или цдаты
unsigned int end_tag_offset;
if (html_tok_type == TOK_CDATA) {
const char *comm_end = (const char *) memmem((text.value + i), text.length - i, "]]>", 3);
i = comm_end ? (comm_end - text.value) + 3 : text.length;
} else if (html_tok_type == TOK_COMMENT) {
// Ищем конец каммента
const char *comm_end = (const char *) memmem((text.value + i), text.length - i, "-->", 3);
i = comm_end ? (comm_end - text.value) + 3 : text.length;
} else if (html_tok_type == TOK_DOCTYPE) {
// Ищем конец <!DOCTYPE
while (i < text.length && text.value[i++] != '>');
} else if (html_tok_type == TOK_COMMENT2) {
// Ищем конец <?
while (i < text.length && text.value[i++] != '>');
}
end_tag_offset = i;
ReplaceTag &r = new_vector_item(to_replace);
NEW_REPLACE(r, R_NONE, start_tag_offset, end_tag_offset);
}
} else {
if (last_tag_brk && wait_closure_tag_id == NULL_TAG_ID) {
if (IS_HTML_SPACE(text.value, i, text.length)) { i += 6; continue; }
if (!IS_SPACE(text.value[i])) {
last_tag_brk = false;
br_cnt = 0;
}
}
++i;
}
}
if ((flags & FLAGS_DEBUG)) {
gettimeofday(&end_time, NULL);
fprintf(stderr, "\nDEBUG: Analize HTML: %.4f ms\n", get_time_delta(start_time, end_time));
gettimeofday(&start_time, NULL);
}
register int offset = 0;
for (unsigned int i = 0; i < to_replace.size(); ++i) {
ReplaceTag &r = to_replace[i];
if (r.id == R_NONE) {
offset -= str_cut(text, offset + r.start, offset + r.end);
} else if (r.id == R_BREAK) {
offset += str_replace(text, offset+r.start, r.end - r.start, const_str_len("<br>"));
} else if (r.id == R_IMG_TAG) {
offset += str_replace(text, offset+r.start, r.end - r.start, const_str_len("[img]"));
} else if (r.id == R_EXTERNAL_LINK) {
unsigned int length = 0;
char *new_buff = url_escape_html_encoded(text.value + offset + r.start, r.end - r.start, &length);
offset += str_replace(
text, offset+r.start, r.end - r.start,
const_str_len(EXTERNAL_LINK_REDIRECT),
new_buff, length
);
free(new_buff);
} else if (r.id == R_INTERNAL_LINK) {
damage_url_arg(text, const_str_len("xyz"));
} else if (r.id == R_HTML_WRAP) {
unsigned int i = r.start + offset, j = 0;
char *replace_to = NULL; unsigned char replace_len = 0;
char *tmp = new char[(r.end - r.start) * 5];
while (i < (offset + r.end)) {
switch (text.value[i]) {
case '<': replace_to = (char *) "<"; replace_len = 4; break;
case '>': replace_to = (char *) ">"; replace_len = 4; break;
case '"': replace_to = (char *) ""; replace_len = 5; break;
// case '&': replace_to = (char *) "&"; replace_len = 5; break;
default: break;
}
if (replace_to) {
memcpy(tmp + j, replace_to, replace_len);
j += replace_len;
replace_to = NULL;
} else
tmp[j++] = text.value[i];
++i;
}
offset += str_replace(text, offset + r.start, r.end - r.start, tmp, j);
delete[] tmp;
}
}
if ((flags & FLAGS_DEBUG)) {
gettimeofday(&end_time, NULL);
fprintf(stderr, "\nDEBUG: Optimize html: %.4f ms\n", get_time_delta(start_time, end_time));
}
if ((flags & FLAGS_CLEAR_SPACES)) {
if ((flags & FLAGS_DEBUG))
gettimeofday(&start_time, NULL);
// Удаляем лишние пробелы
int last_space_sym = -1;
for (unsigned int i = 0; i <= text.length; ++i) {
if (i < text.length && IS_SPACE(text.value[i])) {
if (last_space_sym == -1)
last_space_sym = i;
} else if (i < text.length && IS_HTML_SPACE(text.value, i, text.length)) {
if (last_space_sym == -1)
last_space_sym = i;
i += 5;
} else if (last_space_sym != -1) {
if (i - last_space_sym > 1) {
text.value[last_space_sym] = '\n';
last_space_sym += 1;
i -= str_cut(text, last_space_sym, i);
}
last_space_sym = -1;
}
}
if ((flags & FLAGS_DEBUG)) {
gettimeofday(&end_time, NULL);
fprintf(stderr, "\nDEBUG: Strip whitespaces: %.4f ms\n", get_time_delta(start_time, end_time));
}
}
str_normalize(text);
if ((flags & FLAGS_DEBUG))
gettimeofday(&start_time, NULL);
// Закроем недостающие теги
for (unsigned int i = 0; i < opened_tags.size(); ++i) {
str_append(text, const_str_len("</"));
str_append(text, html_tags[opened_tags[i]].name, html_tags[opened_tags[i]].len);
str_append(text, const_str_len(">"));
}
if ((flags & FLAGS_DEBUG)) {
gettimeofday(&end_time, NULL);
fprintf(stderr, "\nDEBUG: Autocomplete tags: %.4f ms\n", get_time_delta(start_time, end_time));
}
return text;
}
#ifndef PERL_API_VERSION
int main(int argc, char **argv) {
if (argc < 2) {
printf("usage: %s <input_file> [output_file]\n", argv[0]);
return 1;
}
FILE *fp_in = NULL, *fp_out = NULL;
fp_in = fopen(argv[1], "r");
if (!fp_in) {
perror("Open input file");
return 1;
}
if (argc < 3 || strcmp(argv[2], "-") == 0) {
fp_out = stdout;
} else {
fp_out = fopen(argv[2], "w+");
if (!fp_out) {
perror("Open output file");
fclose(fp_in);
return 1;
}
}
char *buffer = NULL;
unsigned int buffer_length = 0;
while (!feof(fp_in)) {
buffer = (char *) realloc(buffer, buffer_length + 1024);
int ret = fread(buffer + buffer_length, sizeof(char), 1024, fp_in);
if (ret > 0)
buffer_length += ret;
}
fclose(fp_in);
// if (buffer[buffer_length - 1] == '\n')
// --buffer_length;
t_str s = optimize_html(buffer, buffer_length, FLAGS_DEBUG | FLAGS_CLEAR_SPACES, false);
fwrite(s.value, sizeof(char), s.length, fp_out);
fclose(fp_out);
free(s.value);
return 0;
}
#endif