From baf23fafb044b5c552c1da6a22ed8a6c3e1dba08 Mon Sep 17 00:00:00 2001 From: c9s Date: Sun, 18 May 2014 20:08:43 +0800 Subject: [PATCH] fix pattern matching for /user/{id}-{user} --- include/r3_define.h | 2 +- include/r3_str.h | 2 +- src/edge.c | 2 +- src/node.c | 52 +++++++++++------------------------ src/str.c | 66 +++++++++++++-------------------------------- tests/bench_str.csv | 61 +++++++++++++++++++++++++++++++++++++++++ tests/check_tree.c | 19 ++++++++++--- 7 files changed, 113 insertions(+), 91 deletions(-) diff --git a/include/r3_define.h b/include/r3_define.h index 8658008..bee91da 100644 --- a/include/r3_define.h +++ b/include/r3_define.h @@ -18,7 +18,7 @@ typedef unsigned char bool; # define TRUE 1 #endif -#define DEBUG 1 +// #define DEBUG 1 #ifdef DEBUG #define info(fmt, ...) \ diff --git a/include/r3_str.h b/include/r3_str.h index c55dd38..9115b0e 100644 --- a/include/r3_str.h +++ b/include/r3_str.h @@ -20,7 +20,7 @@ char * compile_slug(char * str, int len); bool contains_slug(char * str); -char * find_slug_pattern(char *s1); +char * find_slug_pattern(char *s1, int *len); char * find_slug_placeholder(char *s1, int *len); diff --git a/src/edge.c b/src/edge.c index b034d78..043692f 100644 --- a/src/edge.c +++ b/src/edge.c @@ -40,7 +40,7 @@ edge * r3_edge_create(char * pattern, int pattern_len, node * child) { * * * A -> [prefix..suffix] -> B - * A -> [prefix] -> C -> [suffix] -> B + * A -> [prefix] -> B -> [suffix] -> New Child (Copy Data, Edges from B) * */ node * r3_edge_branch(edge *e, int dl) { diff --git a/src/node.c b/src/node.c index 77978cf..232ef41 100644 --- a/src/node.c +++ b/src/node.c @@ -153,11 +153,13 @@ void r3_tree_compile_patterns(node * n) { strncat(p++,")", 1); } - if ( i + 1 < n->edge_len ) { + if ( i + 1 < n->edge_len && n->edge_len > 1 ) { strncat(p++,"|",1); } } + info("pattern: %s\n",cpat); + n->ov_cnt = (1 + n->edge_len) * 3; n->ov = (int*) calloc(sizeof(int), n->ov_cnt); @@ -227,7 +229,7 @@ node * r3_tree_match_with_entry(node * n, match_entry * entry) { * @param match_entry* entry match_entry is used for saving the captured dynamic strings from pcre result. */ node * r3_tree_match(node * n, char * path, int path_len, match_entry * entry) { - // info("try matching: %s\n", path); + info("try matching: %s\n", path); edge *e; int rc; @@ -236,7 +238,7 @@ node * r3_tree_match(node * n, char * path, int path_len, match_entry * entry) { // if the pcre_pattern is found, and the pointer is not NULL, then it's // pcre pattern node, we use pcre_exec to match the nodes if (n->pcre_pattern) { - // info("pcre matching %s on %s\n", n->combined_pattern, path); + info("pcre matching %s on %s\n", n->combined_pattern, path); rc = pcre_exec( n->pcre_pattern, /* the compiled pattern */ @@ -264,6 +266,7 @@ node * r3_tree_match(node * n, char * path, int path_len, match_entry * entry) { return NULL; } + for (i = 1; i < rc; i++) { char *substring_start = path + n->ov[2*i]; @@ -271,7 +274,7 @@ node * r3_tree_match(node * n, char * path, int path_len, match_entry * entry) { // info("%2d: %.*s\n", i, substring_length, substring_start); if ( substring_length > 0) { - int restlen = path_len - n->ov[2*i+1]; // fully match to the end + int restlen = path_len - n->ov[1]; // fully match to the end // info("matched item => restlen:%d edges:%d i:%d\n", restlen, n->edge_len, i); e = n->edges[i - 1]; @@ -283,7 +286,8 @@ node * r3_tree_match(node * n, char * path, int path_len, match_entry * entry) { if (restlen == 0) { return e->child; } - return r3_tree_match( e->child, substring_start + substring_length, restlen, entry); + // get the length of orginal string: $0 + return r3_tree_match( e->child, path + (n->ov[1] - n->ov[0]), restlen, entry); } } // does not match @@ -323,7 +327,7 @@ inline edge * r3_node_find_edge_str(node * n, char * str, int str_len) { } } - // info("matching '%s' with '%s'\n", str, node_edge_pattern(n,i) ); + info("matching '%s' with '%s'\n", str, node_edge_pattern(n,i) ); if ( strncmp( node_edge_pattern(n,matched_idx), str, node_edge_pattern_len(n,matched_idx) ) == 0 ) { return n->edges[matched_idx]; } @@ -472,31 +476,10 @@ node * r3_tree_insert_pathl(node *tree, char *path, int path_len, route * route, /* it's partially matched with the pattern, * we should split the end point and make a branch here... */ - node *c2; // child 1, child 2 - edge *e2; // edge 1, edge 2 char * s2 = path + prefix_len; - int s2_len = 0; - + int s2_len = path_len - prefix_len; r3_edge_branch(e, prefix_len); - // return r3_tree_insert_pathl(e->child, s2 , s2_len, route , data); - - // here is the new edge from. - c2 = r3_tree_create(3); - s2_len = path_len - prefix_len; - e2 = r3_edge_create(strndup(s2, s2_len), s2_len, c2); - // printf("edge right: %s\n", e2->pattern); - r3_node_append_edge(e->child, e2); - - // move n->edges to c1 - c2->endpoint++; - c2->data = data; - if (route) { - route->data = data; - r3_node_append_route(c2, route); - } - return c2; - /* - */ + return r3_tree_insert_pathl(e->child, s2 , s2_len, route , data); } else { printf("unexpected route."); return NULL; @@ -519,6 +502,7 @@ bool r3_node_has_slug_edges(node *n) { void r3_tree_dump(node * n, int level) { + print_indent(level); if ( n->combined_pattern ) { printf(" regexp:%s", n->combined_pattern); } @@ -532,15 +516,11 @@ void r3_tree_dump(node * n, int level) { for ( int i = 0 ; i < n->edge_len ; i++ ) { edge * e = n->edges[i]; - print_indent(level); - printf(" |-\"%s\"", e->pattern); - - if (e->has_slug) { - printf(" slug:"); - printf("%s", compile_slug(e->pattern, e->pattern_len) ); - } + print_indent(level + 1); + printf("|-\"%s\"", e->pattern); if ( e->child ) { + printf("\n"); r3_tree_dump( e->child, level + 1); } printf("\n"); diff --git a/src/str.c b/src/str.c index bf9ebb3..0057dbb 100644 --- a/src/str.c +++ b/src/str.c @@ -113,7 +113,7 @@ char * find_slug_placeholder(char *s1, int *len) { /** * given a slug string, duplicate the pattern string of the slug */ -char * find_slug_pattern(char *s1) { +char * find_slug_pattern(char *s1, int *len) { char *c; char *s2; int cnt = 1; @@ -134,8 +134,8 @@ char * find_slug_pattern(char *s1) { } else { return NULL; } - int len = s2 - c; - return strndup(c, len); + *len = s2 - c; + return c; } @@ -144,75 +144,45 @@ char * find_slug_pattern(char *s1) { */ char * compile_slug(char * str, int len) { - char *s1 = NULL, *s2 = NULL, *o = NULL; + char *s1 = NULL, *o = NULL; char *pat = NULL; char sep = '/'; - // find '{' - s1 = strchr(str, '{'); + + // append prefix + int s1_len; + s1 = find_slug_placeholder(str, &s1_len); if ( s1 == NULL ) { return strdup(str); } - if ( (s1 - str) > 0 ) { - sep = *(s1-1); - } - char * out = NULL; - if ((out = calloc(sizeof(char),128)) == NULL) { + if ((out = calloc(sizeof(char),200)) == NULL) { return (NULL); } - // append prefix o = out; - strncat(o, str, s1 - str); + strncat(o, str, s1 - str); // string before slug o += (s1 - str); - // start after ':' - if ( NULL != (pat = strchr(s1, ':')) ) { - pat++; - // find closing '}' - int cnt = 1; - s2 = pat; - while(s2) { - if (*s2 == '{' ) - cnt++; - else if (*s2 == '}' ) - cnt--; - - if (cnt == 0) - break; - s2++; - } - - // this slug contains a pattern - // s2 = strchr(pat, '}'); + int pat_len; + pat = find_slug_pattern(s1, &pat_len); + if (pat) { *o = '('; o++; - - strncat(o, pat, (s2 - pat) ); - o += (s2 - pat); - + strncat(o, pat, pat_len ); + o += pat_len; *o = ')'; o++; - } else { - // should return a '[^/]+' pattern - // strncat(c, "([^%c]+)", strlen("([^%c]+)") ); - // snprintf(pat, 128, "([^%c]+)", sep); sprintf(o, "([^%c]+)", sep); - o+= sizeof("([^%c]+)"); - } - - s2++; - while( (s2 - str) > len ) { - *o = *s2; - s2++; - o++; + o+= strlen("([^*]+)"); } + s1 += s1_len; + strncat(o, s1, strlen(s1)); return out; } diff --git a/tests/bench_str.csv b/tests/bench_str.csv index 98c5182..a8d477e 100644 --- a/tests/bench_str.csv +++ b/tests/bench_str.csv @@ -252,3 +252,64 @@ 1400411322,10574819.37 1400411340,10536563.80 1400411381,10703727.13 +1400411406,10814145.96 +1400411717,10680938.12 +1400411829,11149498.96 +1400411833,11062632.01 +1400411856,9571612.03 +1400411876,11221957.84 +1400411895,10599710.42 +1400411903,10817749.52 +1400412670,10728801.32 +1400412684,10962187.64 +1400412708,11267224.66 +1400412723,10857559.01 +1400412770,8906644.57 +1400412827,10953246.38 +1400412838,10923438.51 +1400412848,11015834.62 +1400412895,11344942.77 +1400412944,10841369.57 +1400412949,11040353.77 +1400412961,11156072.62 +1400412966,10831108.08 +1400412981,10884440.74 +1400413003,10862551.12 +1400413012,10582158.17 +1400413058,10546292.20 +1400413092,10922604.09 +1400413230,11067709.38 +1400413269,10410991.73 +1400413317,10980282.65 +1400413354,10964929.24 +1400413388,10650346.91 +1400413435,11113745.92 +1400413458,11146293.04 +1400413550,10472731.92 +1400413559,11177595.40 +1400413586,10852453.55 +1400413660,10108857.97 +1400413696,10929343.81 +1400413713,10824792.50 +1400413729,10115599.85 +1400413766,10973125.90 +1400413779,9519723.81 +1400413806,10690956.88 +1400413819,11268613.09 +1400414037,11204556.58 +1400414053,10782873.08 +1400414061,10921441.80 +1400414081,11191230.95 +1400414123,10777241.27 +1400414133,11087850.62 +1400414141,10921616.22 +1400414173,11040258.84 +1400414317,11319968.07 +1400414342,10822736.73 +1400414355,11015188.51 +1400414389,8485410.70 +1400414457,11241764.95 +1400414479,11088645.99 +1400414501,10750962.96 +1400414556,11007510.49 +1400414587,10903071.42 diff --git a/tests/check_tree.c b/tests/check_tree.c index 8355688..c7c5bd1 100644 --- a/tests/check_tree.c +++ b/tests/check_tree.c @@ -161,15 +161,26 @@ START_TEST (test_compile_slug) END_TEST -START_TEST (test_r3_tree_pcre_patterns_insert) +START_TEST (test_pcre_patterns_insert) { node * n = r3_tree_create(10); // r3_tree_insert_path(n, "/foo-{user}-{id}", NULL, NULL); // r3_tree_dump(n, 0); - r3_tree_insert_pathl(n, "/post/{handle}-{id}", strlen("/post/{handle}-{id}"), NULL, NULL); + r3_tree_insert_pathl(n, "/post/{handle:\\d+}-{id:\\d+}", strlen("/post/{handle:\\d+}-{id:\\d+}"), NULL, NULL); r3_tree_compile(n); r3_tree_dump(n, 0); + + node *matched; + matched = r3_tree_match(n, "/post/111-222", strlen("/post/111-222"), NULL); + ck_assert(matched); + ck_assert_int_gt(matched->endpoint, 0); + + // incomplete string shouldn't match + matched = r3_tree_match(n, "/post/111-", strlen("/post/111-"), NULL); + ck_assert(matched); + ck_assert_int_eq(matched->endpoint, 0); + r3_tree_free(n); } END_TEST @@ -745,10 +756,10 @@ Suite* r3_suite (void) { tcase_add_test(tcase, test_insert_route); tcase_add_test(tcase, test_pcre_pattern_simple); tcase_add_test(tcase, test_pcre_pattern_more); - tcase_add_test(tcase, test_r3_tree_pcre_patterns_insert); + tcase_add_test(tcase, test_pcre_patterns_insert); - tcase_add_test(tcase, benchmark_str); + // tcase_add_test(tcase, benchmark_str); suite_add_tcase(suite, tcase);