KMP模式匹配

960阅读 0评论2012-10-02 810
分类:C/C++


点击(此处)折叠或打开

  1. #include <stdio.h>
  2. #include <string.h>
  3. #include <stdlib.h>
  4. #include <assert.h>

  5. int *compute_prefix(const char *pattern) {
  6.     int *prefix;
  7.     int i, k, size;

  8.     assert(pattern);
  9.     size = strlen(pattern);
  10.     prefix = malloc(size * sizeof(int));
  11.     memset(prefix, '\0', size * sizeof(int));
  12.     for (i = 1; i < size; i++) {
  13.         k = prefix[i-1];
  14.         while (1) {
  15.             if (pattern[i] == pattern[k]) {
  16.                 prefix[i] = k+1;
  17.                 break;
  18.             }
  19.             if (k == 0) break;
  20.             k = prefix[k-1];
  21.         }
  22.     }
  23.     return prefix;
  24. }

  25. static int *prefix = NULL;
  26. static char *pattern = NULL;
  27. static int *get_prefix(const char *text) {
  28.     if (!prefix || strcmp(text, pattern)!=0) {
  29.         pattern = text;
  30.         prefix = compute_prefix(text);
  31.     }
  32.     return prefix;
  33. }

  34. const char *kmp_matcher(const char *text, const char* pattern) {
  35.     int i, *prefix;
  36.     const char *tptr, *pptr, *ptr=NULL;

  37.     assert(text);
  38.     assert(pattern);
  39.     tptr = text;
  40.     pptr = pattern;
  41.     prefix = get_prefix(pattern);
  42.     while (1) {
  43.         for(i=0; *tptr++==*pptr++ && *tptr!='\0' && *pptr!='\0'; i++);
  44.         if (*pptr == '\0') {
  45.             ptr = tptr-i-1;
  46.             break;
  47.         }
  48.         else if (*tptr == '\0')
  49.             break;
  50.         pptr = (i==0) ? pattern : pattern+prefix[i-1];
  51.     }
  52.     return ptr;
  53. }

  54. int main(int argc, char *argv[]) {
  55.     char *input;
  56.     int i, *lens;
  57.     if (argc == 1) {
  58.         printf("empty input\n");
  59.         return 0;
  60.     }
  61.     input = argv[1];
  62.     lens = compute_prefix(input);
  63.     printf("%s\n", input);
  64.     for(i = 0; i < strlen(input); i++) {
  65.         printf("%d", lens[i]);
  66.     }
  67.     char *pattern = "aacd";
  68.     char *pos = kmp_matcher("aabbaacdielsoaacd", pattern);
  69.     putchar('\n');
  70.     printf("%s\n", pos);
  71.     pos = kmp_matcher(pos+4, pattern);
  72.     putchar('\n');
  73.     printf("%s\n", pos);
  74.     return 0;
  75. }
 test.zip   
上一篇:正则表达式相关
下一篇:gcc工具集使用备忘