33 #define HEXA_CHARS "0123456789abcdef" 80 if ((buf == NULL) || (size == 0) || (strlen(buf) > size))
88 if ((
new = (
char *) malloc(sz)) == NULL)
95 for (p =
new, q = buf; (size > 0) && (*p !=
'\0'); size--, p++)
104 if (!is_hexa_char(p[1]) || !is_hexa_char(p[2]))
109 *q++ = hexa2char(p[1], p[2]);
136 for (p = q = buf; *p !=
'\0'; p++)
140 if (
strchr(
"01234567890abcdef", tolower(p[1])) &&
141 strchr(
"01234567890abcdef", tolower(p[2])))
143 *q++ = (p[1] << 4) + p[2];
148 if (p[0] ==
'&' && p[1] ==
'#')
167 char *p = NULL, *s, *t;
172 if ((buf == NULL) || (size == 0))
177 if ((p = (
char *) malloc(sz)) == NULL)
183 memset(p, 0, size + 1);
187 for (i = 0; (i < size) && (*t !=
'\0'); i++, t++)
220 q += strcspn(q,
"0123456789");
258 static char *VALID_HTML_TAGS[] = {
259 "<!doctype html[^>]*>",
260 "</?(a|abbr|acronym|address|applet|b|bdo|big|blockquote|body) ?[^>]*>",
261 "</?(button|caption|center|cite|code|colgroup|dd|del|dfn|dir|div) ?[^>]*>",
262 "</?(dl|dt|em|fieldset|font|form|frameset|h[1-6]|head|html|i) ?[^>]*>",
263 "</?(iframe|ins|kbd|label|legend|li|map|menu|nobr|noframes|noscript) ?[^>]*>",
264 "</?(object|ol|optgroup|option|p|pre|q|s|samp|script|select) ?[^>]*>",
265 "</?(small|span|strike|strong|style|sub|sup|table|tbody|td) ?[^>]*>",
266 "</?(td|textarea|tfoot|th|thead|title|tr|tt|u|ul|var) ?[^>]*>",
268 "</?(area|base|basefont|br|col|frame|hr|img|input|isindex|link) ?[^>]*>",
269 "<(meta|param) ?[^>]*>",
271 "<!--[^>]*[--]?[ ]*>",
272 "<(html)?[ ]*[?]?xml.*[ :]?[^>]*>",
273 "<[/?]?xml.*[ :]?[^>]*>",
281 static char *VALID_HTML_TAGS[] = {
282 "<!doctype html[^>]*>",
"</?a ?[^>]*>",
"</?abbr ?[^>]*>",
283 "</?acronym ?[^>]*>",
"</?address ?[^>]*>",
"</?applet ?[^>]*>",
284 "<area ?[^>]*>",
"</?b ?[^>]*>",
"<base ?[^>]*>",
285 "<basefont ?[^>]*>",
"</?bdo ?[^>]*>",
"</?big ?[^>]*>",
286 "</?blockquote ?[^>]*>",
"</?body ?[^>]*>",
"<br ?[^>]*>",
287 "</?button ?[^>]*>",
"</?caption ?[^>]*>",
"</?center ?[^>]*>",
288 "</?cite ?[^>]*>",
"</?code ?[^>]*>",
"<col ?[^>]*>",
289 "</?colgroup ?[^>]*>",
"</?dd ?[^>]*>",
"</?del ?[^>]*>",
290 "</?dfn ?[^>]*>",
"</?dir ?[^>]*>",
"</?div ?[^>]*>",
291 "</?dl ?[^>]*>",
"</?dt ?[^>]*>",
"</?em ?[^>]*>",
292 "</?fieldset ?[^>]*>",
"</?font ?[^>]*>",
"</?form ?[^>]*>",
293 "<frame ?[^>]*>",
"</?frameset ?[^>]*>",
"</?h[1-6] ?[^>]*>",
294 "</?head ?[^>]*>",
"<hr ?[^>]*>",
"</?html ?[^>]*>",
295 "</?i ?[^>]*>",
"</?iframe ?[^>]*>",
"<img ?[^>]*>",
296 "<input ?[^>]*>",
"</?ins ?[^>]*>",
"<isindex ?[^>]*>",
297 "</?kbd ?[^>]*>",
"</?label ?[^>]*>",
"</?legend ?[^>]*>",
298 "</?li ?[^>]*>",
"<link ?[^>]*>",
"</?map ?[^>]*>",
299 "</?menu ?[^>]*>",
"<meta ?[^>]*>",
"</?nobr ?[^>]*>",
"</?noframes ?[^>]*>",
300 "</?noscript ?[^>]*>",
"</?object ?[^>]*>",
"</?ol ?[^>]*>",
301 "</?optgroup ?[^>]*>",
"</?option ?[^>]*>",
"</?p ?[^>]*>",
302 "<param ?[^>]*>",
"</?pre ?[^>]*>",
"</?q ?[^>]*>",
303 "</?s ?[^>]*>",
"</?samp ?[^>]*>",
"</?script ?[^>]*>",
304 "</?select ?[^>]*>",
"</?small ?[^>]*>",
"</?span ?[^>]*>",
305 "</?strike ?[^>]*>",
"</?strong ?[^>]*>",
"</?style ?[^>]*>",
306 "</?sub ?[^>]*>",
"</?sup ?[^>]*>",
"</?table ?[^>]*>",
307 "</?tbody ?[^>]*>",
"</?td ?[^>]*>",
"</?textarea ?[^>]*>",
317 "<!--[^>]*[--]?[ ]*>",
319 "<[/?]?xml.*[ :]?[^>]*>",
343 if ((buf == NULL) || (strlen(buf) == 0))
348 xmlbuf =
zeStrRegex(buf,
"<(html)?[ ]*[?]?xml.*[ :]?[^>]*>", NULL, NULL,
TRUE);
352 while (strlen(p) > 0)
361 if ((pf - pi) <
sizeof (rbuf))
365 size_t len = pf - pi;
367 strncpy(rbuf, p + pi, len);
373 for (u = v = rbuf; *u !=
'\0'; u++)
375 if ((*u !=
'\n') && (*u !=
'\r'))
381 ZE_MessageInfo(19,
"%s SPAMCHECK : Checking : %s, %ld %ld",
id, rbuf, pi,
384 for (s = VALID_HTML_TAGS; (*s != NULL) && !ok; s++)
388 if (1 && !ok && xmlbuf)
390 for (s = VALID_XML_TAGS; (*s != NULL) && !ok; s++)
398 ZE_MessageInfo(10,
"%s SPAM CHECK - NOT VALID HTML TAG : %s",
id, rbuf);
402 "%s SPAM CHECK - NOT VALID HTML TAG : more than 10 already found ! ",
429 {
"<", 0,
'<',
'<'},
430 {
">", 0,
'>',
'>'},
431 {
"&", 0,
'#',
'#'},
432 {
""", 0,
'"',
'"'},
434 {
" ", 160,
' ',
' '},
435 {
"¡", 161,
' ',
'!'},
436 {
"¢", 162,
' ',
' '},
437 {
"£", 163,
' ',
' '},
438 {
"¤", 164,
' ',
' '},
439 {
"¥", 165,
' ',
' '},
440 {
"¦", 166,
'|',
'|'},
441 {
"§", 167,
' ',
' '},
442 {
"¨", 168,
' ',
' '},
443 {
"©", 169,
' ',
' '},
444 {
"ª", 170,
' ',
' '},
445 {
"«", 171,
' ',
' '},
446 {
"¬", 172,
' ',
' '},
447 {
"­", 173,
' ',
' '},
448 {
"®", 174,
' ',
' '},
449 {
"¯", 175,
' ',
' '},
450 {
"°", 176,
' ',
' '},
451 {
"±", 177,
' ',
'+'},
452 {
"²", 178,
'²',
' '},
453 {
"³", 179,
' ',
' '},
454 {
"´", 180,
'^',
'^'},
455 {
"µ", 181,
' ',
' '},
456 {
"¶", 182,
' ',
' '},
457 {
"·", 183,
' ',
' '},
458 {
"¸", 184,
'ç',
'c'},
459 {
"¹", 185,
' ',
' '},
460 {
"º", 186,
' ',
' '},
461 {
"»", 187,
' ',
' '},
462 {
"¼", 188,
' ',
' '},
463 {
"½", 189,
' ',
' '},
464 {
"¾", 190,
' ',
' '},
465 {
"¿", 191,
' ',
' '},
466 {
"À", 192,
' ',
'a'},
467 {
"Á", 193,
' ',
'a'},
468 {
"Â", 194,
' ',
'a'},
469 {
"Ã", 195,
' ',
'a'},
470 {
"Ä", 196,
' ',
'a'},
471 {
"Å", 197,
' ',
'a'},
472 {
"Æ", 198,
' ',
'a'},
473 {
"Ç", 199,
' ',
'c'},
474 {
"È", 200,
' ',
'e'},
475 {
"É", 201,
' ',
'e'},
476 {
"Ê", 202,
' ',
'e'},
477 {
"Ë", 203,
' ',
'e'},
478 {
"Ì", 204,
' ',
'i'},
479 {
"Í", 205,
' ',
'i'},
480 {
"Î", 206,
' ',
'i'},
481 {
"Ï", 207,
' ',
'i'},
482 {
"Ð", 208,
' ',
' '},
483 {
"Ñ", 209,
' ',
'n'},
484 {
"Ò", 210,
' ',
'o'},
485 {
"Ó", 211,
' ',
'o'},
486 {
"Ô", 212,
' ',
'o'},
487 {
"Õ", 213,
' ',
'o'},
488 {
"Ö", 214,
' ',
'o'},
489 {
"×", 215,
' ',
'x'},
490 {
"Ø", 216,
' ',
'o'},
491 {
"Ù", 217,
' ',
'u'},
492 {
"Ú", 218,
' ',
'u'},
493 {
"Û", 219,
' ',
'u'},
494 {
"Ü", 220,
' ',
'u'},
495 {
"Ý", 221,
' ',
'y'},
496 {
"Þ", 222,
' ',
' '},
497 {
"ß", 223,
' ',
's'},
498 {
"à", 224,
' ',
'a'},
499 {
"á", 225,
' ',
'a'},
500 {
"â", 226,
' ',
'a'},
501 {
"ã", 227,
' ',
'a'},
502 {
"ä", 228,
' ',
'a'},
503 {
"å", 229,
' ',
'a'},
504 {
"æ", 230,
' ',
'a'},
505 {
"ç", 231,
' ',
'c'},
506 {
"è", 232,
' ',
'e'},
507 {
"é", 233,
' ',
'e'},
508 {
"ê", 234,
' ',
'e'},
509 {
"ë", 235,
' ',
'e'},
510 {
"ì", 236,
' ',
'i'},
511 {
"í", 237,
' ',
'i'},
512 {
"î", 238,
' ',
'i'},
513 {
"ï", 239,
' ',
'i'},
514 {
"ð", 240,
' ',
' '},
515 {
"ñ", 241,
' ',
'n'},
516 {
"ò", 242,
' ',
'o'},
517 {
"ó", 243,
' ',
'o'},
518 {
"ô", 244,
' ',
'o'},
519 {
"õ", 245,
' ',
'o'},
520 {
"ö", 246,
' ',
'o'},
521 {
"÷", 247,
' ',
'/'},
522 {
"ø", 248,
' ',
'o'},
523 {
"ù", 249,
' ',
'u'},
524 {
"ú", 250,
' ',
'u'},
525 {
"û", 251,
' ',
'u'},
526 {
"ü", 252,
' ',
'u'},
527 {
"ý", 253,
' ',
'y'},
528 {
"þ", 254,
' ',
' '},
529 {
"ÿ", 255,
' ',
'y'},
531 {NULL, 0,
'\0',
'\0'}
540 for (p = VALID_ENTITIES; p->
name != NULL; p++)
542 if (strncasecmp(p->
name, s, strlen(p->
name)) == 0)
char * convert_html_codes(char *buf)
struct regex_tag_T regex_tag_T
#define ZE_LogMsgError(level,...)
bool zeStrRegex(char *, char *, long *, long *, bool)
#define ZE_MessageInfo(level,...)
#define ZE_LogSysError(...)