diff --git a/bench_str.csv b/bench_str.csv index 7d3901f..84a882c 100644 --- a/bench_str.csv +++ b/bench_str.csv @@ -470,3 +470,12 @@ 1400818725,11160125.46,2574373.69,47127.01 1400818732,10829199.02,2557782.44,67650.06 1400818739,10859734.88,2538368.71,41527.76 +1400820693,12547680.62,2375764.20,55924.05 +1400820703,12815067.19,2375474.47,34379.54 +1400820719,11693810.54,2231143.55,47662.55 +1400820728,12612875.15,2357108.19,49932.19 +1400820868,12158497.75,2598723.80,62601.55 +1400820877,12254639.62,2583601.86,77672.30 +1400820886,12274457.34,2393445.83,55188.21 +1400820922,12218386.22,2604565.56,77672.30 +1400820933,12443155.46,2361317.46,45590.26 diff --git a/include/r3.h b/include/r3.h index 2c3da24..b7d17cd 100644 --- a/include/r3.h +++ b/include/r3.h @@ -99,7 +99,9 @@ void r3_tree_free(node * tree); void r3_edge_free(edge * edge); -edge * r3_node_add_child(node * n, char * pat , node *child); +edge * r3_node_connectl(node * n, char * pat, int len, int strdup, node *child); + +#define r3_node_connect(n, pat, child) r3_node_connectl(n, pat, strlen(pat), 0, child) edge * r3_node_find_edge(node * n, char * pat); @@ -175,7 +177,7 @@ route * r3_tree_match_route(const node *n, match_entry * entry); -int r3_pattern_to_opcode(char * pattern); +int r3_pattern_to_opcode(char * pattern, int pattern_len); enum { NODE_COMPARE_STR, NODE_COMPARE_PCRE, NODE_COMPARE_OPCODE }; diff --git a/include/r3_str.h b/include/r3_str.h index 0775d06..f83bfcf 100644 --- a/include/r3_str.h +++ b/include/r3_str.h @@ -16,9 +16,9 @@ char * slug_compile(char * str, int len); bool contains_slug(char * str); -char * find_slug_pattern(char *s1, int *len); +char * slug_find_pattern(char *s1, int *len); -char * find_slug_placeholder(char *s1, int *len); +char * slug_find_placeholder(char *s1, int *len); char * inside_slug(char * needle, int needle_len, char *offset); diff --git a/src/node.c b/src/node.c index c411f6e..aa2299c 100644 --- a/src/node.c +++ b/src/node.c @@ -83,12 +83,8 @@ void r3_tree_free(node * tree) { tree = NULL; } - - -/* parent node, edge pattern, child */ -edge * r3_node_add_child(node * n, char * pat , node *child) { +edge * r3_node_connectl(node * n, char * pat, int len, int dupl, node *child) { // find the same sub-pattern, if it does not exist, create one - edge * e; e = r3_node_find_edge(n, pat); @@ -96,15 +92,14 @@ edge * r3_node_add_child(node * n, char * pat , node *child) { return e; } - e = r3_edge_create( pat, strlen(pat), child); + if (dupl) { + pat = zstrndup(pat, len); + } + e = r3_edge_create(pat, len, child); r3_node_append_edge(n, e); - // str_array_append(n->edge_patterns, pat); - // assert( str_array_len(n->edge_patterns) == n->edge_len ); return e; } - - void r3_node_append_edge(node *n, edge *e) { if (n->edges == NULL) { n->edge_cap = 3; @@ -165,19 +160,11 @@ void r3_tree_compile_patterns(node * n) { p++; edge *e = NULL; - int opcode_cnt = 0; for ( int i = 0 ; i < n->edge_len ; i++ ) { e = n->edges[i]; if ( e->has_slug ) { // compile "foo/{slug}" to "foo/[^/]+" char * slug_pat = slug_compile(e->pattern, e->pattern_len); - - // if found available opcode - e->opcode = r3_pattern_to_opcode(slug_pat); - if (e->opcode) { - opcode_cnt++; - } - strcat(p, slug_pat); } else { strncat(p++,"(", 1); @@ -196,10 +183,12 @@ void r3_tree_compile_patterns(node * n) { info("pattern: %s\n",cpat); // if all edges use opcode, we should skip the combined_pattern. + /* if ( opcode_cnt == n->edge_len ) { zfree(cpat); return; } + */ n->combined_pattern = cpat; @@ -358,24 +347,20 @@ route * r3_tree_match_route(const node *tree, match_entry * entry) { inline edge * r3_node_find_edge_str(const node * n, char * str, int str_len) { int i = 0; - int matched_idx = 0; + int matched_idx = -1; char firstbyte = *str; for (; i < n->edge_len ; i++ ) { if ( firstbyte == *(n->edges[i]->pattern) ) { - matched_idx = i; - break; + info("matching '%s' with '%s'\n", str, node_edge_pattern(n,i) ); + if ( strncmp( node_edge_pattern(n,i), str, node_edge_pattern_len(n,i) ) == 0 ) { + return n->edges[i]; + } + return NULL; } } - - info("matching '%s' with '%s'\n", str, node_edge_pattern(n,i) ); - if ( strncmp( node_edge_pattern(n,matched_idx), str, node_edge_pattern_len(n,matched_idx) ) == 0 ) { - return n->edges[matched_idx]; - } return NULL; } - - node * r3_node_create() { node * n = (node*) zmalloc( sizeof(node) ); n->edges = NULL; @@ -454,17 +439,18 @@ node * r3_tree_insert_pathl_(node *tree, char *path, int path_len, route * route // common prefix not found, insert a new edge for this pattern if ( prefix_len == 0 ) { // there are two more slugs, we should break them into several parts - if ( slug_count(path, path_len) > 1 ) { + int slug_cnt = slug_count(path, path_len); + if ( slug_cnt > 1 ) { int slug_len; - char *p = find_slug_placeholder(path, &slug_len); + char *p = slug_find_placeholder(path, &slug_len); #ifdef DEBUG assert(p); #endif - // find the next one + // find the next one '{', then break there if(p) { - p = find_slug_placeholder(p + slug_len + 1, NULL); + p = slug_find_placeholder(p + slug_len + 1, NULL); } #ifdef DEBUG assert(p); @@ -472,18 +458,62 @@ node * r3_tree_insert_pathl_(node *tree, char *path, int path_len, route * route // insert the first one edge, and break at "p" node * child = r3_tree_create(3); - r3_node_add_child(n, zstrndup(path, (int)(p - path)), child); - child->endpoint = 0; + r3_node_connect(n, zstrndup(path, (int)(p - path)), child); // and insert the rest part to the child return r3_tree_insert_pathl_(child, p, path_len - (int)(p - path), route, data); + } else { + if (slug_cnt == 1) { + // there is one slug, let's see if it's optimiz-able by opcode + int slug_len = 0; + char *slug_p = slug_find_placeholder(path, &slug_len); + int slug_pattern_len = 0; + char *slug_pattern = slug_find_pattern(slug_p, &slug_pattern_len); + int opcode = 0; + // if there is a pattern defined. + if (slug_pattern) { + char *cpattern = slug_compile(slug_pattern, slug_pattern_len); + opcode = r3_pattern_to_opcode(cpattern, strlen(cpattern)); + zfree(cpattern); + } else { + opcode = OP_EXPECT_NOSLASH; + } + // found opcode + if (opcode) { + // if the slug starts after one+ charactor, for example foo{slug} + node *c1; + if (slug_p > path) { + c1 = r3_tree_create(3); + r3_node_connectl(n, path, slug_p - path, 1, c1); // duplicate + } else { + c1 = n; + } + + node * c2 = r3_tree_create(3); + edge * op_edge = r3_node_connectl(c1, slug_p, slug_len , 1, c2); + op_edge->opcode = opcode; + + // insert rest + int restlen = (path_len - (slug_p - path)) - slug_len; + if (restlen) { + return r3_tree_insert_pathl_(c2, slug_p + slug_len, restlen, route, data); + } + + c2->data = data; + c2->endpoint++; + if (route) { + route->data = data; + r3_node_append_route(c2, route); + } + return c2; + } + } + // only one slug node * child = r3_tree_create(3); - r3_node_add_child(n, zstrndup(path, path_len) , child); - // info("edge not found, insert one: %s\n", path); + r3_node_connect(n, zstrndup(path, path_len) , child); child->data = data; child->endpoint++; - if (route) { route->data = data; r3_node_append_route(child, route); @@ -565,6 +595,10 @@ void r3_tree_dump(node * n, int level) { print_indent(level + 1); printf("|-\"%s\"", e->pattern); + if (e->opcode ) { + printf(" opcode:%d", e->opcode); + } + if ( e->child ) { printf("\n"); r3_tree_dump( e->child, level + 1); diff --git a/src/str.c b/src/str.c index 2017aaf..8f7e839 100644 --- a/src/str.c +++ b/src/str.c @@ -13,17 +13,17 @@ #include "str_array.h" #include "zmalloc.h" -int r3_pattern_to_opcode(char * pattern) { - if ( strcmp(pattern, "\\w+") == 0 ) { +int r3_pattern_to_opcode(char * pattern, int len) { + if ( strncmp(pattern, "\\w+",len) == 0 ) { return OP_EXPECT_WORDS; } - if ( strcmp(pattern, "\\d+") == 0 ) { + if ( strncmp(pattern, "\\d+", len) == 0 ) { return OP_EXPECT_DIGITS; } - if ( strcmp(pattern, "[^/]+") == 0 ) { + if ( strncmp(pattern, "[^/]+", len) == 0 ) { return OP_EXPECT_NOSLASH; } - if ( strcmp(pattern, "[^-]+") == 0 ) { + if ( strncmp(pattern, "[^-]+", len) == 0 ) { return OP_EXPECT_NODASH; } return 0; @@ -84,7 +84,7 @@ char * inside_slug(char * needle, int needle_len, char *offset) { return NULL; } -char * find_slug_placeholder(char *s1, int *len) { +char * slug_find_placeholder(char *s1, int *len) { char *c; char *s2; int cnt = 0; @@ -116,7 +116,7 @@ char * find_slug_placeholder(char *s1, int *len) { /** * given a slug string, duplicate the pattern string of the slug */ -char * find_slug_pattern(char *s1, int *len) { +char * slug_find_pattern(char *s1, int *len) { char *c; char *s2; int cnt = 1; @@ -154,7 +154,7 @@ char * slug_compile(char * str, int len) // append prefix int s1_len; - s1 = find_slug_placeholder(str, &s1_len); + s1 = slug_find_placeholder(str, &s1_len); if ( s1 == NULL ) { return zstrdup(str); @@ -171,7 +171,7 @@ char * slug_compile(char * str, int len) int pat_len; - pat = find_slug_pattern(s1, &pat_len); + pat = slug_find_pattern(s1, &pat_len); if (pat) { *o = '('; diff --git a/tests/check_slug.c b/tests/check_slug.c index 56d0804..9959d54 100644 --- a/tests/check_slug.c +++ b/tests/check_slug.c @@ -15,10 +15,10 @@ START_TEST (test_pattern_to_opcode) { - ck_assert( r3_pattern_to_opcode("\\w+") == OP_EXPECT_WORDS ); - ck_assert( r3_pattern_to_opcode("\\d+") == OP_EXPECT_DIGITS ); - ck_assert( r3_pattern_to_opcode("[^/]+") == OP_EXPECT_NOSLASH ); - ck_assert( r3_pattern_to_opcode("[^-]+") == OP_EXPECT_NODASH ); + ck_assert( r3_pattern_to_opcode("\\w+", strlen("\\w+")) == OP_EXPECT_WORDS ); + ck_assert( r3_pattern_to_opcode("\\d+", strlen("\\d+")) == OP_EXPECT_DIGITS ); + ck_assert( r3_pattern_to_opcode("[^/]+",strlen("[^/]+")) == OP_EXPECT_NOSLASH ); + ck_assert( r3_pattern_to_opcode("[^-]+",strlen("[^-]+")) == OP_EXPECT_NODASH ); } END_TEST @@ -50,24 +50,24 @@ START_TEST (test_contains_slug) } END_TEST -START_TEST (test_find_slug_pattern) +START_TEST (test_slug_find_pattern) { int len; - char * namerex = find_slug_pattern("{name:\\s+}", &len); + char * namerex = slug_find_pattern("{name:\\s+}", &len); ck_assert( strncmp(namerex, "\\s+", len) == 0 ); } END_TEST -START_TEST (test_find_slug_placeholder) +START_TEST (test_slug_find_placeholder) { int slug_len = 0; char * slug; - slug = find_slug_placeholder("/user/{name:\\s+}/to/{id}", &slug_len); + slug = slug_find_placeholder("/user/{name:\\s+}/to/{id}", &slug_len); ck_assert( strncmp(slug, "{name:\\s+}", slug_len) == 0 ); - slug = find_slug_placeholder("/user/{idx:\\d{3}}/to/{idy:\\d{3}}", &slug_len); + slug = slug_find_placeholder("/user/{idx:\\d{3}}/to/{idy:\\d{3}}", &slug_len); ck_assert( slug_len == strlen("{idx:\\d{3}}") ); ck_assert( strncmp(slug, "{idx:\\d{3}}", slug_len) == 0 ); } @@ -95,10 +95,10 @@ START_TEST (test_slug_count) } END_TEST -START_TEST (test_find_slug_placeholder_with_broken_slug) +START_TEST (test_slug_find_placeholder_with_broken_slug) { int slug_len = 0; - char * slug = find_slug_placeholder("/user/{name:\\s+/to/{id", &slug_len); + char * slug = slug_find_placeholder("/user/{name:\\s+/to/{id", &slug_len); ck_assert(! slug); } END_TEST @@ -110,9 +110,9 @@ Suite* r3_suite (void) { tcase_set_timeout(tcase, 30); tcase_add_test(tcase, test_contains_slug); tcase_add_test(tcase, test_inside_slug); - tcase_add_test(tcase, test_find_slug_pattern); - tcase_add_test(tcase, test_find_slug_placeholder); - tcase_add_test(tcase, test_find_slug_placeholder_with_broken_slug); + tcase_add_test(tcase, test_slug_find_pattern); + tcase_add_test(tcase, test_slug_find_placeholder); + tcase_add_test(tcase, test_slug_find_placeholder_with_broken_slug); tcase_add_test(tcase, test_slug_count); tcase_add_test(tcase, test_slug_compile); tcase_add_test(tcase, test_pattern_to_opcode); diff --git a/tests/check_tree.c b/tests/check_tree.c index a58192e..91706f1 100644 --- a/tests/check_tree.c +++ b/tests/check_tree.c @@ -32,7 +32,7 @@ START_TEST (test_r3_node_find_edge) node * child = r3_tree_create(3); - fail_if( r3_node_add_child(n, zstrdup("/add") , child) == FALSE ); + fail_if( r3_node_connect(n, zstrdup("/add") , child) == FALSE ); fail_if( r3_node_find_edge(n, "/add") == NULL ); fail_if( r3_node_find_edge(n, "/bar") != NULL );