| 1 |
/*
|
| 2 |
* simple_regex.c
|
| 3 |
*
|
| 4 |
* Simle regex library.
|
| 5 |
*
|
| 6 |
* Copyright 1999-2008 Gentoo Foundation
|
| 7 |
* Copyright 2004-2007 Martin Schlemmer <azarah@nosferatu.za.org>
|
| 8 |
* Licensed under the GPL-2
|
| 9 |
*/
|
| 10 |
|
| 11 |
/*
|
| 12 |
* Some notes:
|
| 13 |
*
|
| 14 |
* - This is a very simple regex library (read: return a match if some string
|
| 15 |
* matches some regex). It is probably not POSIX (if there are a POSIX or
|
| 16 |
* other standard) compatible.
|
| 17 |
*
|
| 18 |
* - I primarily wrote it to _not_ use glibc type regex functions, in case we
|
| 19 |
* might want to use it in code that have to be linked agaist klibc, etc.
|
| 20 |
*
|
| 21 |
* - It really is not optimized in any way yet.
|
| 22 |
*
|
| 23 |
* - Supported operators are:
|
| 24 |
*
|
| 25 |
* '.', '?', '*', '+' - So called 'wildcards'
|
| 26 |
* '[a-z]', '[^a-z]' - Basic 'lists'. Note that 'a-z' just specify that
|
| 27 |
* it supports basic lists as well as sequences ..
|
| 28 |
* The '^' is for an inverted list of course.
|
| 29 |
* '^', '$' - The 'from start' and 'to end' operators. If these
|
| 30 |
* are not used at the start ('^') or end ('$') of the
|
| 31 |
* regex, they will be treated as normal characters
|
| 32 |
* (this of course exclude the use of '^' in a 'list').
|
| 33 |
*
|
| 34 |
* - If an invalid argument was passed, the functions returns 0 with
|
| 35 |
* 'regex_data->match == 0' (no error with no match) rather than -1. It may
|
| 36 |
* not be consistant with other practices, but I personally do not feel it is
|
| 37 |
* a critical error for these types of functions, and there are debugging you
|
| 38 |
* can enable to verify that there are no such issues.
|
| 39 |
*
|
| 40 |
* - __somefunction() is usually a helper function for somefunction(). I guess
|
| 41 |
* recursion might be an alternative, but I try to avoid it.
|
| 42 |
*
|
| 43 |
* - In general if we are matching a 'wildcard' ('*', '+' or '?'), a 'word'
|
| 44 |
* (read: some part of the regex that do not contain a 'wildcard' or 'list')
|
| 45 |
* will have a greater 'weight' than the 'wildcard'. This means that we
|
| 46 |
* will only continue to evaluate the 'wildcard' until the following 'word'
|
| 47 |
* (if any) matches. Currently this do not hold true for a 'list' not
|
| 48 |
* followed by a 'wildcard' - I might fix this in future.
|
| 49 |
*
|
| 50 |
*/
|
| 51 |
|
| 52 |
#include "headers.h"
|
| 53 |
#include "rcscripts/rcutil.h"
|
| 54 |
|
| 55 |
/* Macro to check if a regex_data_t pointer is valid */
|
| 56 |
#define CHECK_REGEX_DATA_P(_regex_data, _on_error) \
|
| 57 |
do { \
|
| 58 |
if ((NULL == _regex_data) \
|
| 59 |
|| (NULL == _regex_data->data) \
|
| 60 |
/* We do not check for this, as it might still \
|
| 61 |
* provide a match ('*' or '?' wildcard) */ \
|
| 62 |
/* || (0 == strlen(_regex_data->data)) */ \
|
| 63 |
|| (NULL == _regex_data->regex) \
|
| 64 |
|| (0 == strlen(_regex_data->regex))) \
|
| 65 |
{ \
|
| 66 |
rc_errno_set (EINVAL); \
|
| 67 |
DBG_MSG ("Invalid argument passed!\n"); \
|
| 68 |
goto _on_error; \
|
| 69 |
} \
|
| 70 |
} while (0)
|
| 71 |
|
| 72 |
static size_t get_word (const char *regex, char **r_word);
|
| 73 |
static int match_word (regex_data_t * regex_data);
|
| 74 |
static size_t get_list_size (const char *regex);
|
| 75 |
static size_t get_list (const char *regex, char **r_list);
|
| 76 |
static int __match_list (regex_data_t * regex_data);
|
| 77 |
static int match_list (regex_data_t * regex_data);
|
| 78 |
static size_t get_wildcard (const char *regex, char *r_wildcard);
|
| 79 |
static int __match_wildcard (regex_data_t * regex_data,
|
| 80 |
int (*match_func) (regex_data_t * regex_data),
|
| 81 |
const char *regex);
|
| 82 |
static int match_wildcard (regex_data_t * regex_data);
|
| 83 |
static int __match (regex_data_t * regex_data);
|
| 84 |
|
| 85 |
/*
|
| 86 |
* Return values for match_* functions
|
| 87 |
*
|
| 88 |
* 0 - There was no error. If there was a match, regex_data->match
|
| 89 |
* - will be > 0 (this is the definitive check - if not true, the
|
| 90 |
* - other values of the struct may be bogus), regex_data->count
|
| 91 |
* - will be the amount of data that was matched (might be 0 for
|
| 92 |
* - some wildcards), and regex_data->r_count will be > 0.
|
| 93 |
*
|
| 94 |
* -1 - An error occured. Check errno for more info.
|
| 95 |
*
|
| 96 |
*/
|
| 97 |
|
| 98 |
size_t
|
| 99 |
get_word (const char *regex, char **r_word)
|
| 100 |
{
|
| 101 |
char *r_list;
|
| 102 |
char *str_ptr;
|
| 103 |
size_t count = 0;
|
| 104 |
size_t tmp_count;
|
| 105 |
|
| 106 |
if (!check_arg_str (regex))
|
| 107 |
return 0;
|
| 108 |
|
| 109 |
*r_word = xmalloc (strlen (regex) + 1);
|
| 110 |
if (NULL == r_word)
|
| 111 |
return 0;
|
| 112 |
|
| 113 |
str_ptr = *r_word;
|
| 114 |
|
| 115 |
while (strlen (regex) > 0)
|
| 116 |
{
|
| 117 |
switch (regex[0])
|
| 118 |
{
|
| 119 |
case '*':
|
| 120 |
case '+':
|
| 121 |
case '?':
|
| 122 |
/* If its a wildcard, backup one step */
|
| 123 |
*(--str_ptr) = '\0';
|
| 124 |
count--;
|
| 125 |
return count;
|
| 126 |
case '[':
|
| 127 |
tmp_count = get_list (regex, &r_list);
|
| 128 |
free (r_list);
|
| 129 |
/* In theory should not happen, but you never know
|
| 130 |
* what may happen in future ... */
|
| 131 |
if (-1 == tmp_count)
|
| 132 |
goto error;
|
| 133 |
|
| 134 |
/* Bail if we have a list */
|
| 135 |
if (tmp_count > 0)
|
| 136 |
{
|
| 137 |
str_ptr[0] = '\0';
|
| 138 |
return count;
|
| 139 |
}
|
| 140 |
default:
|
| 141 |
*str_ptr++ = *regex++;
|
| 142 |
count++;
|
| 143 |
break;
|
| 144 |
}
|
| 145 |
}
|
| 146 |
|
| 147 |
str_ptr[0] = '\0';
|
| 148 |
|
| 149 |
return count;
|
| 150 |
|
| 151 |
error:
|
| 152 |
free (*r_word);
|
| 153 |
|
| 154 |
return -1;
|
| 155 |
}
|
| 156 |
|
| 157 |
int
|
| 158 |
match_word (regex_data_t * regex_data)
|
| 159 |
{
|
| 160 |
char *data_p = regex_data->data;
|
| 161 |
char *r_word = NULL, *r_word_p;
|
| 162 |
size_t count = 0;
|
| 163 |
|
| 164 |
CHECK_REGEX_DATA_P (regex_data, exit);
|
| 165 |
|
| 166 |
count = get_word (regex_data->regex, &r_word);
|
| 167 |
if (-1 == count)
|
| 168 |
goto error;
|
| 169 |
if (0 == count)
|
| 170 |
goto exit;
|
| 171 |
r_word_p = r_word;
|
| 172 |
|
| 173 |
while ((strlen (data_p) > 0) && (strlen (r_word_p) > 0))
|
| 174 |
{
|
| 175 |
/* If 'r_word' is not 100% part of 'string', we do not have
|
| 176 |
* a match. If its a '.', it matches no matter what. */
|
| 177 |
if ((data_p[0] != r_word_p[0]) && ('.' != r_word_p[0]))
|
| 178 |
{
|
| 179 |
count = 0;
|
| 180 |
goto exit;
|
| 181 |
}
|
| 182 |
|
| 183 |
data_p++;
|
| 184 |
r_word_p++;
|
| 185 |
}
|
| 186 |
|
| 187 |
/* If 'string' is shorter than 'r_word', we do not have a match */
|
| 188 |
if ((0 == strlen (data_p)) && (0 < strlen (r_word_p)))
|
| 189 |
{
|
| 190 |
count = 0;
|
| 191 |
goto exit;
|
| 192 |
}
|
| 193 |
|
| 194 |
exit:
|
| 195 |
/* Fill in our structure */
|
| 196 |
if (0 == count)
|
| 197 |
regex_data->match = REGEX_NO_MATCH;
|
| 198 |
else if (strlen (regex_data->data) == count)
|
| 199 |
regex_data->match = REGEX_FULL_MATCH;
|
| 200 |
else
|
| 201 |
regex_data->match = REGEX_PARTIAL_MATCH;
|
| 202 |
if (regex_data->match != REGEX_NO_MATCH)
|
| 203 |
regex_data->where = regex_data->data;
|
| 204 |
else
|
| 205 |
regex_data->where = NULL;
|
| 206 |
regex_data->count = count;
|
| 207 |
regex_data->r_count = count;
|
| 208 |
|
| 209 |
free (r_word);
|
| 210 |
return 0;
|
| 211 |
|
| 212 |
error:
|
| 213 |
regex_data->match = REGEX_NO_MATCH;
|
| 214 |
|
| 215 |
free (r_word);
|
| 216 |
return -1;
|
| 217 |
}
|
| 218 |
|
| 219 |
size_t
|
| 220 |
get_list_size (const char *regex)
|
| 221 |
{
|
| 222 |
size_t count = 0;
|
| 223 |
|
| 224 |
if (!check_arg_str (regex))
|
| 225 |
return 0;
|
| 226 |
|
| 227 |
if ('[' != regex[0])
|
| 228 |
{
|
| 229 |
rc_errno_set (EINVAL);
|
| 230 |
DBG_MSG ("Invalid argument passed!\n");
|
| 231 |
return 0;
|
| 232 |
}
|
| 233 |
|
| 234 |
regex++;
|
| 235 |
|
| 236 |
while ((strlen (regex) > 0) && (']' != regex[0]))
|
| 237 |
{
|
| 238 |
/* We have a sequence (x-y) */
|
| 239 |
if (('-' == regex[0])
|
| 240 |
&& (']' != regex[1])
|
| 241 |
&& (strlen (regex) >= 2) && (regex[-1] < regex[1]))
|
| 242 |
{
|
| 243 |
/* Add current + diff in sequence */
|
| 244 |
count += regex[1] - regex[-1];
|
| 245 |
/* Take care of '-' and next char */
|
| 246 |
regex += 2;
|
| 247 |
}
|
| 248 |
else
|
| 249 |
{
|
| 250 |
regex++;
|
| 251 |
count++;
|
| 252 |
}
|
| 253 |
}
|
| 254 |
|
| 255 |
return count;
|
| 256 |
}
|
| 257 |
|
| 258 |
size_t
|
| 259 |
get_list (const char *regex, char **r_list)
|
| 260 |
{
|
| 261 |
char *buf = NULL;
|
| 262 |
size_t count = 0;
|
| 263 |
size_t size;
|
| 264 |
|
| 265 |
if (!check_arg_str (regex))
|
| 266 |
return 0;
|
| 267 |
|
| 268 |
/* Bail if we do not have a list. Do not add debugging, as
|
| 269 |
* it is very noisy (used a lot when we call match_list() in
|
| 270 |
* __match() and match() to test for list matching) */
|
| 271 |
if ('[' != regex[0])
|
| 272 |
return 0;
|
| 273 |
|
| 274 |
size = get_list_size (regex);
|
| 275 |
if (0 == size)
|
| 276 |
{
|
| 277 |
/* Should not be an issue, but just in case */
|
| 278 |
DBG_MSG ("0 returned by get_list_size.\n");
|
| 279 |
return 0;
|
| 280 |
}
|
| 281 |
|
| 282 |
*r_list = xmalloc (size + 1);
|
| 283 |
if (NULL == *r_list)
|
| 284 |
return -1;
|
| 285 |
|
| 286 |
buf = *r_list;
|
| 287 |
|
| 288 |
/* Take care of '[' */
|
| 289 |
regex++;
|
| 290 |
count++;
|
| 291 |
|
| 292 |
while ((strlen (regex) > 0) && (']' != regex[0]))
|
| 293 |
{
|
| 294 |
/* We have a sequence (x-y) */
|
| 295 |
if (('-' == regex[0])
|
| 296 |
&& (']' != regex[1])
|
| 297 |
&& (strlen (regex) >= 2) && (regex[-1] < regex[1]))
|
| 298 |
{
|
| 299 |
/* Fill in missing chars in sequence */
|
| 300 |
while (buf[-1] < regex[1])
|
| 301 |
{
|
| 302 |
buf[0] = (char) (buf[-1] + 1);
|
| 303 |
buf++;
|
| 304 |
/* We do not increase count */
|
| 305 |
}
|
| 306 |
/* Take care of '-' and next char */
|
| 307 |
count += 2;
|
| 308 |
regex += 2;
|
| 309 |
}
|
| 310 |
else
|
| 311 |
{
|
| 312 |
*buf++ = *regex++;
|
| 313 |
count++;
|
| 314 |
}
|
| 315 |
}
|
| 316 |
|
| 317 |
buf[0] = '\0';
|
| 318 |
/* Take care of ']' */
|
| 319 |
count++;
|
| 320 |
|
| 321 |
/* We do not have a list as it does not end in ']' */
|
| 322 |
if (']' != regex[0])
|
| 323 |
{
|
| 324 |
count = 0;
|
| 325 |
free (*r_list);
|
| 326 |
}
|
| 327 |
|
| 328 |
return count;
|
| 329 |
}
|
| 330 |
|
| 331 |
/* If the first is the '^' character, everything but the list is matched
|
| 332 |
* NOTE: We only evaluate _ONE_ data character at a time!! */
|
| 333 |
int
|
| 334 |
__match_list (regex_data_t * regex_data)
|
| 335 |
{
|
| 336 |
regex_data_t tmp_data;
|
| 337 |
char *data_p = regex_data->data;
|
| 338 |
char *list_p = regex_data->regex;
|
| 339 |
char test_regex[2] = { '\0', '\0' };
|
| 340 |
int invert = 0;
|
| 341 |
int lmatch;
|
| 342 |
int retval;
|
| 343 |
|
| 344 |
CHECK_REGEX_DATA_P (regex_data, failed);
|
| 345 |
|
| 346 |
if ('^' == list_p[0])
|
| 347 |
{
|
| 348 |
/* We need to invert the match */
|
| 349 |
invert = 1;
|
| 350 |
/* Make sure '^' is not part of our list */
|
| 351 |
list_p++;
|
| 352 |
}
|
| 353 |
|
| 354 |
if (invert)
|
| 355 |
/* All should be a match if not in the list */
|
| 356 |
lmatch = 1;
|
| 357 |
else
|
| 358 |
/* We only have a match if in the list */
|
| 359 |
lmatch = 0;
|
| 360 |
|
| 361 |
while (strlen (list_p) > 0)
|
| 362 |
{
|
| 363 |
test_regex[0] = list_p[0];
|
| 364 |
|
| 365 |
FILL_REGEX_DATA (tmp_data, data_p, test_regex);
|
| 366 |
retval = match_word (&tmp_data);
|
| 367 |
if (-1 == retval)
|
| 368 |
goto error;
|
| 369 |
|
| 370 |
if (REGEX_MATCH (tmp_data))
|
| 371 |
{
|
| 372 |
if (invert)
|
| 373 |
/* If we exclude the list from
|
| 374 |
* characters we try to match, we
|
| 375 |
* have a match until one of the
|
| 376 |
* list is found. */
|
| 377 |
lmatch = 0;
|
| 378 |
else
|
| 379 |
/* If not, we have to keep looking
|
| 380 |
* until one from the list match
|
| 381 |
* before we have a match */
|
| 382 |
lmatch = 1;
|
| 383 |
break;
|
| 384 |
}
|
| 385 |
list_p++;
|
| 386 |
}
|
| 387 |
|
| 388 |
/* Fill in our structure */
|
| 389 |
if (lmatch)
|
| 390 |
{
|
| 391 |
regex_data->match = REGEX_PARTIAL_MATCH;
|
| 392 |
regex_data->where = regex_data->data;
|
| 393 |
regex_data->count = 1;
|
| 394 |
/* This one is more cosmetic, as match_list() will
|
| 395 |
* do the right thing */
|
| 396 |
regex_data->r_count = 0; /* strlen(regex_data->regex); */
|
| 397 |
}
|
| 398 |
else
|
| 399 |
{
|
| 400 |
failed:
|
| 401 |
regex_data->match = REGEX_NO_MATCH;
|
| 402 |
regex_data->where = NULL;
|
| 403 |
regex_data->count = 0;
|
| 404 |
regex_data->r_count = 0;
|
| 405 |
}
|
| 406 |
|
| 407 |
return 0;
|
| 408 |
|
| 409 |
error:
|
| 410 |
regex_data->match = REGEX_NO_MATCH;
|
| 411 |
|
| 412 |
return -1;
|
| 413 |
}
|
| 414 |
|
| 415 |
int
|
| 416 |
match_list (regex_data_t * regex_data)
|
| 417 |
{
|
| 418 |
regex_data_t tmp_data;
|
| 419 |
char *data_p = regex_data->data;
|
| 420 |
char *list_p = regex_data->regex;
|
| 421 |
char *r_list = NULL;
|
| 422 |
size_t r_count = 0;
|
| 423 |
int retval;
|
| 424 |
|
| 425 |
CHECK_REGEX_DATA_P (regex_data, failed);
|
| 426 |
|
| 427 |
r_count = get_list (list_p, &r_list);
|
| 428 |
if (-1 == r_count)
|
| 429 |
goto error;
|
| 430 |
if (0 == r_count)
|
| 431 |
goto failed;
|
| 432 |
|
| 433 |
FILL_REGEX_DATA (tmp_data, data_p, &list_p[r_count - 1]);
|
| 434 |
retval = __match_wildcard (&tmp_data, __match_list, r_list);
|
| 435 |
if (-1 == retval)
|
| 436 |
goto error;
|
| 437 |
if (REGEX_MATCH (tmp_data))
|
| 438 |
{
|
| 439 |
/* This should be 2 ('word' + 'wildcard'), so just remove
|
| 440 |
* the wildcard */
|
| 441 |
tmp_data.r_count--;
|
| 442 |
goto exit;
|
| 443 |
}
|
| 444 |
|
| 445 |
FILL_REGEX_DATA (tmp_data, data_p, r_list);
|
| 446 |
retval = __match_list (&tmp_data);
|
| 447 |
if (-1 == retval)
|
| 448 |
goto error;
|
| 449 |
if (REGEX_MATCH (tmp_data))
|
| 450 |
goto exit;
|
| 451 |
|
| 452 |
failed:
|
| 453 |
/* We will fill in regex_data below */
|
| 454 |
tmp_data.match = REGEX_NO_MATCH;
|
| 455 |
tmp_data.where = NULL;
|
| 456 |
tmp_data.count = 0;
|
| 457 |
tmp_data.r_count = 0;
|
| 458 |
|
| 459 |
exit:
|
| 460 |
/* Fill in our structure */
|
| 461 |
regex_data->match = tmp_data.match;
|
| 462 |
regex_data->where = tmp_data.where;
|
| 463 |
regex_data->count = tmp_data.count;
|
| 464 |
if (regex_data->match != REGEX_NO_MATCH)
|
| 465 |
/* tmp_data.r_count for __match_wildcard will take care of the
|
| 466 |
* wildcard, and tmp_data.r_count for __match_list will be 0 */
|
| 467 |
regex_data->r_count = r_count + tmp_data.r_count;
|
| 468 |
else
|
| 469 |
regex_data->r_count = 0;
|
| 470 |
|
| 471 |
free (r_list);
|
| 472 |
return 0;
|
| 473 |
|
| 474 |
error:
|
| 475 |
regex_data->match = REGEX_NO_MATCH;
|
| 476 |
|
| 477 |
free (r_list);
|
| 478 |
return -1;
|
| 479 |
}
|
| 480 |
|
| 481 |
size_t
|
| 482 |
get_wildcard (const char *regex, char *r_wildcard)
|
| 483 |
{
|
| 484 |
if (!check_arg_str (regex))
|
| 485 |
return 0;
|
| 486 |
|
| 487 |
r_wildcard[0] = regex[0];
|
| 488 |
r_wildcard[2] = '\0';
|
| 489 |
|
| 490 |
switch (regex[1])
|
| 491 |
{
|
| 492 |
case '*':
|
| 493 |
case '+':
|
| 494 |
case '?':
|
| 495 |
r_wildcard[1] = regex[1];
|
| 496 |
break;
|
| 497 |
default:
|
| 498 |
r_wildcard[0] = '\0';
|
| 499 |
return 0;
|
| 500 |
}
|
| 501 |
|
| 502 |
return strlen (r_wildcard);
|
| 503 |
}
|
| 504 |
|
| 505 |
int
|
| 506 |
__match_wildcard (regex_data_t * regex_data,
|
| 507 |
int (*match_func) (regex_data_t * regex_data),
|
| 508 |
const char *regex)
|
| 509 |
{
|
| 510 |
regex_data_t tmp_data;
|
| 511 |
char *data_p = regex_data->data;
|
| 512 |
char *wildcard_p = regex_data->regex;
|
| 513 |
char r_wildcard[3];
|
| 514 |
size_t count = 0;
|
| 515 |
size_t r_count = 0;
|
| 516 |
int is_match = 0;
|
| 517 |
int retval;
|
| 518 |
|
| 519 |
CHECK_REGEX_DATA_P (regex_data, exit);
|
| 520 |
|
| 521 |
if (NULL == match_func)
|
| 522 |
{
|
| 523 |
rc_errno_set (EINVAL);
|
| 524 |
DBG_MSG ("NULL match_func was passed!\n");
|
| 525 |
goto exit;
|
| 526 |
}
|
| 527 |
|
| 528 |
r_count = get_wildcard (wildcard_p, r_wildcard);
|
| 529 |
if (0 == r_count)
|
| 530 |
goto exit;
|
| 531 |
|
| 532 |
FILL_REGEX_DATA (tmp_data, data_p, (char *) regex);
|
| 533 |
retval = match_func (&tmp_data);
|
| 534 |
if (-1 == retval)
|
| 535 |
goto error;
|
| 536 |
|
| 537 |
switch (r_wildcard[1])
|
| 538 |
{
|
| 539 |
case '*':
|
| 540 |
case '?':
|
| 541 |
/* '*' and '?' always matches */
|
| 542 |
is_match = 1;
|
| 543 |
case '+':
|
| 544 |
/* We need to match all of them */
|
| 545 |
do
|
| 546 |
{
|
| 547 |
/* If we have at least one match for '+', or none
|
| 548 |
* for '*' or '?', check if we have a word or list match.
|
| 549 |
* We do this because a word weights more than a wildcard */
|
| 550 |
if ((strlen (wildcard_p) > 2)
|
| 551 |
&& ((count > 0)
|
| 552 |
|| ('*' == r_wildcard[1])
|
| 553 |
|| ('?' == r_wildcard[1])))
|
| 554 |
{
|
| 555 |
regex_data_t tmp_data2;
|
| 556 |
#if 0
|
| 557 |
printf ("data_p = %s, wildcard_p = %s\n", data_p, wildcard_p);
|
| 558 |
#endif
|
| 559 |
|
| 560 |
FILL_REGEX_DATA (tmp_data2, data_p, &wildcard_p[2]);
|
| 561 |
retval = match (&tmp_data2);
|
| 562 |
if (-1 == retval)
|
| 563 |
goto error;
|
| 564 |
|
| 565 |
if (
|
| 566 |
/* '.' might be a special case ... */
|
| 567 |
/* ('.' != wildcard_p[2]) && */
|
| 568 |
((REGEX_MATCH (tmp_data2))
|
| 569 |
&& (REGEX_FULL_MATCH == tmp_data2.match)))
|
| 570 |
{
|
| 571 |
goto exit;
|
| 572 |
}
|
| 573 |
}
|
| 574 |
|
| 575 |
if (REGEX_MATCH (tmp_data))
|
| 576 |
{
|
| 577 |
data_p += tmp_data.count;
|
| 578 |
count += tmp_data.count;
|
| 579 |
is_match = 1;
|
| 580 |
|
| 581 |
FILL_REGEX_DATA (tmp_data, data_p, (char *) regex);
|
| 582 |
retval = match_func (&tmp_data);
|
| 583 |
if (-1 == retval)
|
| 584 |
goto error;
|
| 585 |
}
|
| 586 |
/* Only once for '?' */
|
| 587 |
}
|
| 588 |
while ((REGEX_MATCH (tmp_data)) && ('?' != r_wildcard[1]));
|
| 589 |
|
| 590 |
break;
|
| 591 |
default:
|
| 592 |
/* No wildcard */
|
| 593 |
break;
|
| 594 |
}
|
| 595 |
|
| 596 |
exit:
|
| 597 |
/* Fill in our structure */
|
| 598 |
/* We can still have a match ('*' and '?'), although count == 0 */
|
| 599 |
if ((0 == count) && (0 == is_match))
|
| 600 |
regex_data->match = REGEX_NO_MATCH;
|
| 601 |
else if (strlen (regex_data->data) == count)
|
| 602 |
regex_data->match = REGEX_FULL_MATCH;
|
| 603 |
else
|
| 604 |
regex_data->match = REGEX_PARTIAL_MATCH;
|
| 605 |
if (regex_data->match != REGEX_NO_MATCH)
|
| 606 |
regex_data->where = regex_data->data;
|
| 607 |
else
|
| 608 |
regex_data->where = NULL;
|
| 609 |
regex_data->count = count;
|
| 610 |
regex_data->r_count = r_count;
|
| 611 |
|
| 612 |
return 0;
|
| 613 |
|
| 614 |
error:
|
| 615 |
regex_data->match = REGEX_NO_MATCH;
|
| 616 |
|
| 617 |
return -1;
|
| 618 |
}
|
| 619 |
|
| 620 |
int
|
| 621 |
match_wildcard (regex_data_t * regex_data)
|
| 622 |
{
|
| 623 |
regex_data_t tmp_data;
|
| 624 |
char *data_p = regex_data->data;
|
| 625 |
char *wildcard_p = regex_data->regex;
|
| 626 |
char r_wildcard[3];
|
| 627 |
size_t r_count;
|
| 628 |
int retval;
|
| 629 |
|
| 630 |
CHECK_REGEX_DATA_P (regex_data, failed);
|
| 631 |
|
| 632 |
/* Invalid wildcard - we need a character + a regex operator */
|
| 633 |
if (strlen (wildcard_p) < 2)
|
| 634 |
goto failed;
|
| 635 |
|
| 636 |
r_count = get_wildcard (wildcard_p, r_wildcard);
|
| 637 |
if (0 == r_count)
|
| 638 |
goto failed;
|
| 639 |
|
| 640 |
/* Needed so that match_word() will not bail if it sees the wildcard */
|
| 641 |
r_wildcard[1] = '\0';
|
| 642 |
|
| 643 |
FILL_REGEX_DATA (tmp_data, data_p, wildcard_p);
|
| 644 |
retval = __match_wildcard (&tmp_data, match_word, r_wildcard);
|
| 645 |
if (-1 == retval)
|
| 646 |
goto error;
|
| 647 |
if (REGEX_MATCH (tmp_data))
|
| 648 |
goto exit;
|
| 649 |
|
| 650 |
failed:
|
| 651 |
/* We will fill in regex_data below */
|
| 652 |
tmp_data.match = REGEX_NO_MATCH;
|
| 653 |
tmp_data.where = NULL;
|
| 654 |
tmp_data.count = 0;
|
| 655 |
tmp_data.r_count = 0;
|
| 656 |
|
| 657 |
exit:
|
| 658 |
/* Fill in our structure */
|
| 659 |
regex_data->match = tmp_data.match;
|
| 660 |
regex_data->where = tmp_data.where;
|
| 661 |
regex_data->count = tmp_data.count;
|
| 662 |
regex_data->r_count = tmp_data.r_count;
|
| 663 |
|
| 664 |
return 0;
|
| 665 |
|
| 666 |
error:
|
| 667 |
regex_data->match = REGEX_NO_MATCH;
|
| 668 |
|
| 669 |
return -1;
|
| 670 |
}
|
| 671 |
|
| 672 |
int
|
| 673 |
__match (regex_data_t * regex_data)
|
| 674 |
{
|
| 675 |
regex_data_t tmp_data;
|
| 676 |
char *data_p = regex_data->data;
|
| 677 |
char *regex_p = regex_data->regex;
|
| 678 |
size_t count = 0;
|
| 679 |
size_t r_count = 0;
|
| 680 |
int rmatch = 0;
|
| 681 |
int retval;
|
| 682 |
|
| 683 |
CHECK_REGEX_DATA_P (regex_data, failed);
|
| 684 |
|
| 685 |
while (strlen (regex_p) > 0)
|
| 686 |
{
|
| 687 |
#if 0
|
| 688 |
printf ("data_p = '%s', regex_p = '%s'\n", data_p, regex_p);
|
| 689 |
#endif
|
| 690 |
|
| 691 |
FILL_REGEX_DATA (tmp_data, data_p, regex_p);
|
| 692 |
retval = match_list (&tmp_data);
|
| 693 |
if (-1 == retval)
|
| 694 |
goto error;
|
| 695 |
if (REGEX_MATCH (tmp_data))
|
| 696 |
goto have_match;
|
| 697 |
|
| 698 |
FILL_REGEX_DATA (tmp_data, data_p, regex_p);
|
| 699 |
retval = match_wildcard (&tmp_data);
|
| 700 |
if (-1 == retval)
|
| 701 |
goto error;
|
| 702 |
if (REGEX_MATCH (tmp_data))
|
| 703 |
goto have_match;
|
| 704 |
|
| 705 |
FILL_REGEX_DATA (tmp_data, data_p, regex_p);
|
| 706 |
retval = match_word (&tmp_data);
|
| 707 |
if (-1 == retval)
|
| 708 |
goto error;
|
| 709 |
if (REGEX_MATCH (tmp_data))
|
| 710 |
goto have_match;
|
| 711 |
|
| 712 |
break;
|
| 713 |
|
| 714 |
have_match:
|
| 715 |
data_p += tmp_data.count;
|
| 716 |
count += tmp_data.count;
|
| 717 |
regex_p += tmp_data.r_count;
|
| 718 |
r_count += tmp_data.r_count;
|
| 719 |
rmatch = 1;
|
| 720 |
|
| 721 |
/* Check that we do not go out of bounds */
|
| 722 |
if (((data_p - regex_data->data) > strlen (regex_data->data))
|
| 723 |
|| ((regex_p - regex_data->regex) > strlen (regex_data->regex)))
|
| 724 |
goto failed;
|
| 725 |
}
|
| 726 |
|
| 727 |
/* We could not match the whole regex (data too short?) */
|
| 728 |
if (0 != strlen (regex_p))
|
| 729 |
goto failed;
|
| 730 |
|
| 731 |
goto exit;
|
| 732 |
|
| 733 |
failed:
|
| 734 |
/* We will fill in regex_data below */
|
| 735 |
count = 0;
|
| 736 |
r_count = 0;
|
| 737 |
rmatch = 0;
|
| 738 |
|
| 739 |
exit:
|
| 740 |
/* Fill in our structure */
|
| 741 |
/* We can still have a match ('*' and '?'), although count == 0 */
|
| 742 |
if ((0 == count) && (0 == rmatch))
|
| 743 |
regex_data->match = REGEX_NO_MATCH;
|
| 744 |
else if (strlen (regex_data->data) == count)
|
| 745 |
regex_data->match = REGEX_FULL_MATCH;
|
| 746 |
else
|
| 747 |
regex_data->match = REGEX_PARTIAL_MATCH;
|
| 748 |
if (regex_data->match != REGEX_NO_MATCH)
|
| 749 |
regex_data->where = regex_data->data;
|
| 750 |
else
|
| 751 |
regex_data->where = NULL;
|
| 752 |
regex_data->count = count;
|
| 753 |
regex_data->r_count = r_count;
|
| 754 |
|
| 755 |
return 0;
|
| 756 |
|
| 757 |
error:
|
| 758 |
regex_data->match = REGEX_NO_MATCH;
|
| 759 |
|
| 760 |
return -1;
|
| 761 |
}
|
| 762 |
|
| 763 |
int
|
| 764 |
match (regex_data_t * regex_data)
|
| 765 |
{
|
| 766 |
regex_data_t tmp_data;
|
| 767 |
char *data_p = regex_data->data;
|
| 768 |
char *regex_p;
|
| 769 |
char *buf = NULL;
|
| 770 |
int from_start = 0;
|
| 771 |
int to_end = 0;
|
| 772 |
int retval;
|
| 773 |
|
| 774 |
CHECK_REGEX_DATA_P (regex_data, failed);
|
| 775 |
|
| 776 |
/* We might be modifying regex_p, so make a copy */
|
| 777 |
buf = xstrndup (regex_data->regex, strlen (regex_data->regex));
|
| 778 |
if (NULL == buf)
|
| 779 |
goto error;
|
| 780 |
|
| 781 |
regex_p = buf;
|
| 782 |
|
| 783 |
/* Should we only match from the start? */
|
| 784 |
if ('^' == regex_p[0])
|
| 785 |
{
|
| 786 |
regex_p++;
|
| 787 |
from_start = 1;
|
| 788 |
}
|
| 789 |
|
| 790 |
/* Should we match up to the end? */
|
| 791 |
if ('$' == regex_p[strlen (regex_p) - 1])
|
| 792 |
{
|
| 793 |
regex_p[strlen (regex_p) - 1] = '\0';
|
| 794 |
to_end = 1;
|
| 795 |
}
|
| 796 |
|
| 797 |
do
|
| 798 |
{
|
| 799 |
FILL_REGEX_DATA (tmp_data, data_p, regex_p);
|
| 800 |
retval = __match (&tmp_data);
|
| 801 |
if (-1 == retval)
|
| 802 |
goto error;
|
| 803 |
}
|
| 804 |
while ((strlen (data_p++) > 0)
|
| 805 |
&& (!REGEX_MATCH (tmp_data)) && (0 == from_start));
|
| 806 |
|
| 807 |
/* Compensate for above extra inc */
|
| 808 |
data_p--;
|
| 809 |
|
| 810 |
/* Fill in our structure */
|
| 811 |
if (REGEX_MATCH (tmp_data))
|
| 812 |
{
|
| 813 |
/* Check if we had an '$' at the end of the regex, and
|
| 814 |
* verify that we still have a match */
|
| 815 |
if ((1 == to_end) && (tmp_data.count != strlen (data_p)))
|
| 816 |
{
|
| 817 |
goto failed;
|
| 818 |
}
|
| 819 |
|
| 820 |
if ((data_p == regex_data->data)
|
| 821 |
&& (tmp_data.match == REGEX_FULL_MATCH))
|
| 822 |
regex_data->match = REGEX_FULL_MATCH;
|
| 823 |
else
|
| 824 |
regex_data->match = REGEX_PARTIAL_MATCH;
|
| 825 |
regex_data->where = data_p;
|
| 826 |
regex_data->count = tmp_data.count;
|
| 827 |
regex_data->r_count = tmp_data.r_count;
|
| 828 |
if (1 == from_start)
|
| 829 |
regex_data->r_count++;
|
| 830 |
if (1 == to_end)
|
| 831 |
regex_data->r_count++;
|
| 832 |
}
|
| 833 |
else
|
| 834 |
{
|
| 835 |
failed:
|
| 836 |
regex_data->match = REGEX_NO_MATCH;
|
| 837 |
regex_data->where = NULL;
|
| 838 |
regex_data->count = 0;
|
| 839 |
regex_data->r_count = 0;
|
| 840 |
}
|
| 841 |
|
| 842 |
free (buf);
|
| 843 |
|
| 844 |
return 0;
|
| 845 |
|
| 846 |
error:
|
| 847 |
regex_data->match = REGEX_NO_MATCH;
|
| 848 |
free (buf);
|
| 849 |
|
| 850 |
return -1;
|
| 851 |
}
|