fix pattern matching for /user/{id}-{user}
This commit is contained in:
parent
f039cc3c8f
commit
baf23fafb0
7 changed files with 113 additions and 91 deletions
|
@ -18,7 +18,7 @@ typedef unsigned char bool;
|
||||||
# define TRUE 1
|
# define TRUE 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define DEBUG 1
|
// #define DEBUG 1
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
|
|
||||||
#define info(fmt, ...) \
|
#define info(fmt, ...) \
|
||||||
|
|
|
@ -20,7 +20,7 @@ char * compile_slug(char * str, int len);
|
||||||
|
|
||||||
bool contains_slug(char * str);
|
bool contains_slug(char * str);
|
||||||
|
|
||||||
char * find_slug_pattern(char *s1);
|
char * find_slug_pattern(char *s1, int *len);
|
||||||
|
|
||||||
char * find_slug_placeholder(char *s1, int *len);
|
char * find_slug_placeholder(char *s1, int *len);
|
||||||
|
|
||||||
|
|
|
@ -40,7 +40,7 @@ edge * r3_edge_create(char * pattern, int pattern_len, node * child) {
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
* A -> [prefix..suffix] -> B
|
* A -> [prefix..suffix] -> B
|
||||||
* A -> [prefix] -> C -> [suffix] -> B
|
* A -> [prefix] -> B -> [suffix] -> New Child (Copy Data, Edges from B)
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
node * r3_edge_branch(edge *e, int dl) {
|
node * r3_edge_branch(edge *e, int dl) {
|
||||||
|
|
50
src/node.c
50
src/node.c
|
@ -153,11 +153,13 @@ void r3_tree_compile_patterns(node * n) {
|
||||||
strncat(p++,")", 1);
|
strncat(p++,")", 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( i + 1 < n->edge_len ) {
|
if ( i + 1 < n->edge_len && n->edge_len > 1 ) {
|
||||||
strncat(p++,"|",1);
|
strncat(p++,"|",1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
info("pattern: %s\n",cpat);
|
||||||
|
|
||||||
n->ov_cnt = (1 + n->edge_len) * 3;
|
n->ov_cnt = (1 + n->edge_len) * 3;
|
||||||
n->ov = (int*) calloc(sizeof(int), n->ov_cnt);
|
n->ov = (int*) calloc(sizeof(int), n->ov_cnt);
|
||||||
|
|
||||||
|
@ -227,7 +229,7 @@ node * r3_tree_match_with_entry(node * n, match_entry * entry) {
|
||||||
* @param match_entry* entry match_entry is used for saving the captured dynamic strings from pcre result.
|
* @param match_entry* entry match_entry is used for saving the captured dynamic strings from pcre result.
|
||||||
*/
|
*/
|
||||||
node * r3_tree_match(node * n, char * path, int path_len, match_entry * entry) {
|
node * r3_tree_match(node * n, char * path, int path_len, match_entry * entry) {
|
||||||
// info("try matching: %s\n", path);
|
info("try matching: %s\n", path);
|
||||||
|
|
||||||
edge *e;
|
edge *e;
|
||||||
int rc;
|
int rc;
|
||||||
|
@ -236,7 +238,7 @@ node * r3_tree_match(node * n, char * path, int path_len, match_entry * entry) {
|
||||||
// if the pcre_pattern is found, and the pointer is not NULL, then it's
|
// if the pcre_pattern is found, and the pointer is not NULL, then it's
|
||||||
// pcre pattern node, we use pcre_exec to match the nodes
|
// pcre pattern node, we use pcre_exec to match the nodes
|
||||||
if (n->pcre_pattern) {
|
if (n->pcre_pattern) {
|
||||||
// info("pcre matching %s on %s\n", n->combined_pattern, path);
|
info("pcre matching %s on %s\n", n->combined_pattern, path);
|
||||||
|
|
||||||
rc = pcre_exec(
|
rc = pcre_exec(
|
||||||
n->pcre_pattern, /* the compiled pattern */
|
n->pcre_pattern, /* the compiled pattern */
|
||||||
|
@ -264,6 +266,7 @@ node * r3_tree_match(node * n, char * path, int path_len, match_entry * entry) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
for (i = 1; i < rc; i++)
|
for (i = 1; i < rc; i++)
|
||||||
{
|
{
|
||||||
char *substring_start = path + n->ov[2*i];
|
char *substring_start = path + n->ov[2*i];
|
||||||
|
@ -271,7 +274,7 @@ node * r3_tree_match(node * n, char * path, int path_len, match_entry * entry) {
|
||||||
// info("%2d: %.*s\n", i, substring_length, substring_start);
|
// info("%2d: %.*s\n", i, substring_length, substring_start);
|
||||||
|
|
||||||
if ( substring_length > 0) {
|
if ( substring_length > 0) {
|
||||||
int restlen = path_len - n->ov[2*i+1]; // fully match to the end
|
int restlen = path_len - n->ov[1]; // fully match to the end
|
||||||
// info("matched item => restlen:%d edges:%d i:%d\n", restlen, n->edge_len, i);
|
// info("matched item => restlen:%d edges:%d i:%d\n", restlen, n->edge_len, i);
|
||||||
|
|
||||||
e = n->edges[i - 1];
|
e = n->edges[i - 1];
|
||||||
|
@ -283,7 +286,8 @@ node * r3_tree_match(node * n, char * path, int path_len, match_entry * entry) {
|
||||||
if (restlen == 0) {
|
if (restlen == 0) {
|
||||||
return e->child;
|
return e->child;
|
||||||
}
|
}
|
||||||
return r3_tree_match( e->child, substring_start + substring_length, restlen, entry);
|
// get the length of orginal string: $0
|
||||||
|
return r3_tree_match( e->child, path + (n->ov[1] - n->ov[0]), restlen, entry);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// does not match
|
// does not match
|
||||||
|
@ -323,7 +327,7 @@ inline edge * r3_node_find_edge_str(node * n, char * str, int str_len) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// info("matching '%s' with '%s'\n", str, node_edge_pattern(n,i) );
|
info("matching '%s' with '%s'\n", str, node_edge_pattern(n,i) );
|
||||||
if ( strncmp( node_edge_pattern(n,matched_idx), str, node_edge_pattern_len(n,matched_idx) ) == 0 ) {
|
if ( strncmp( node_edge_pattern(n,matched_idx), str, node_edge_pattern_len(n,matched_idx) ) == 0 ) {
|
||||||
return n->edges[matched_idx];
|
return n->edges[matched_idx];
|
||||||
}
|
}
|
||||||
|
@ -472,31 +476,10 @@ node * r3_tree_insert_pathl(node *tree, char *path, int path_len, route * route,
|
||||||
/* it's partially matched with the pattern,
|
/* it's partially matched with the pattern,
|
||||||
* we should split the end point and make a branch here...
|
* we should split the end point and make a branch here...
|
||||||
*/
|
*/
|
||||||
node *c2; // child 1, child 2
|
|
||||||
edge *e2; // edge 1, edge 2
|
|
||||||
char * s2 = path + prefix_len;
|
char * s2 = path + prefix_len;
|
||||||
int s2_len = 0;
|
int s2_len = path_len - prefix_len;
|
||||||
|
|
||||||
r3_edge_branch(e, prefix_len);
|
r3_edge_branch(e, prefix_len);
|
||||||
// return r3_tree_insert_pathl(e->child, s2 , s2_len, route , data);
|
return r3_tree_insert_pathl(e->child, s2 , s2_len, route , data);
|
||||||
|
|
||||||
// here is the new edge from.
|
|
||||||
c2 = r3_tree_create(3);
|
|
||||||
s2_len = path_len - prefix_len;
|
|
||||||
e2 = r3_edge_create(strndup(s2, s2_len), s2_len, c2);
|
|
||||||
// printf("edge right: %s\n", e2->pattern);
|
|
||||||
r3_node_append_edge(e->child, e2);
|
|
||||||
|
|
||||||
// move n->edges to c1
|
|
||||||
c2->endpoint++;
|
|
||||||
c2->data = data;
|
|
||||||
if (route) {
|
|
||||||
route->data = data;
|
|
||||||
r3_node_append_route(c2, route);
|
|
||||||
}
|
|
||||||
return c2;
|
|
||||||
/*
|
|
||||||
*/
|
|
||||||
} else {
|
} else {
|
||||||
printf("unexpected route.");
|
printf("unexpected route.");
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -519,6 +502,7 @@ bool r3_node_has_slug_edges(node *n) {
|
||||||
|
|
||||||
|
|
||||||
void r3_tree_dump(node * n, int level) {
|
void r3_tree_dump(node * n, int level) {
|
||||||
|
print_indent(level);
|
||||||
if ( n->combined_pattern ) {
|
if ( n->combined_pattern ) {
|
||||||
printf(" regexp:%s", n->combined_pattern);
|
printf(" regexp:%s", n->combined_pattern);
|
||||||
}
|
}
|
||||||
|
@ -532,15 +516,11 @@ void r3_tree_dump(node * n, int level) {
|
||||||
|
|
||||||
for ( int i = 0 ; i < n->edge_len ; i++ ) {
|
for ( int i = 0 ; i < n->edge_len ; i++ ) {
|
||||||
edge * e = n->edges[i];
|
edge * e = n->edges[i];
|
||||||
print_indent(level);
|
print_indent(level + 1);
|
||||||
printf("|-\"%s\"", e->pattern);
|
printf("|-\"%s\"", e->pattern);
|
||||||
|
|
||||||
if (e->has_slug) {
|
|
||||||
printf(" slug:");
|
|
||||||
printf("%s", compile_slug(e->pattern, e->pattern_len) );
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( e->child ) {
|
if ( e->child ) {
|
||||||
|
printf("\n");
|
||||||
r3_tree_dump( e->child, level + 1);
|
r3_tree_dump( e->child, level + 1);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
|
66
src/str.c
66
src/str.c
|
@ -113,7 +113,7 @@ char * find_slug_placeholder(char *s1, int *len) {
|
||||||
/**
|
/**
|
||||||
* given a slug string, duplicate the pattern string of the slug
|
* given a slug string, duplicate the pattern string of the slug
|
||||||
*/
|
*/
|
||||||
char * find_slug_pattern(char *s1) {
|
char * find_slug_pattern(char *s1, int *len) {
|
||||||
char *c;
|
char *c;
|
||||||
char *s2;
|
char *s2;
|
||||||
int cnt = 1;
|
int cnt = 1;
|
||||||
|
@ -134,8 +134,8 @@ char * find_slug_pattern(char *s1) {
|
||||||
} else {
|
} else {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
int len = s2 - c;
|
*len = s2 - c;
|
||||||
return strndup(c, len);
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -144,75 +144,45 @@ char * find_slug_pattern(char *s1) {
|
||||||
*/
|
*/
|
||||||
char * compile_slug(char * str, int len)
|
char * compile_slug(char * str, int len)
|
||||||
{
|
{
|
||||||
char *s1 = NULL, *s2 = NULL, *o = NULL;
|
char *s1 = NULL, *o = NULL;
|
||||||
char *pat = NULL;
|
char *pat = NULL;
|
||||||
char sep = '/';
|
char sep = '/';
|
||||||
|
|
||||||
// find '{'
|
|
||||||
s1 = strchr(str, '{');
|
// append prefix
|
||||||
|
int s1_len;
|
||||||
|
s1 = find_slug_placeholder(str, &s1_len);
|
||||||
|
|
||||||
if ( s1 == NULL ) {
|
if ( s1 == NULL ) {
|
||||||
return strdup(str);
|
return strdup(str);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( (s1 - str) > 0 ) {
|
|
||||||
sep = *(s1-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
char * out = NULL;
|
char * out = NULL;
|
||||||
if ((out = calloc(sizeof(char),128)) == NULL) {
|
if ((out = calloc(sizeof(char),200)) == NULL) {
|
||||||
return (NULL);
|
return (NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
// append prefix
|
|
||||||
o = out;
|
o = out;
|
||||||
strncat(o, str, s1 - str);
|
strncat(o, str, s1 - str); // string before slug
|
||||||
o += (s1 - str);
|
o += (s1 - str);
|
||||||
|
|
||||||
// start after ':'
|
|
||||||
if ( NULL != (pat = strchr(s1, ':')) ) {
|
|
||||||
pat++;
|
|
||||||
|
|
||||||
// find closing '}'
|
int pat_len;
|
||||||
int cnt = 1;
|
pat = find_slug_pattern(s1, &pat_len);
|
||||||
s2 = pat;
|
|
||||||
while(s2) {
|
|
||||||
if (*s2 == '{' )
|
|
||||||
cnt++;
|
|
||||||
else if (*s2 == '}' )
|
|
||||||
cnt--;
|
|
||||||
|
|
||||||
if (cnt == 0)
|
|
||||||
break;
|
|
||||||
s2++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// this slug contains a pattern
|
|
||||||
// s2 = strchr(pat, '}');
|
|
||||||
|
|
||||||
|
if (pat) {
|
||||||
*o = '(';
|
*o = '(';
|
||||||
o++;
|
o++;
|
||||||
|
strncat(o, pat, pat_len );
|
||||||
strncat(o, pat, (s2 - pat) );
|
o += pat_len;
|
||||||
o += (s2 - pat);
|
|
||||||
|
|
||||||
*o = ')';
|
*o = ')';
|
||||||
o++;
|
o++;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
// should return a '[^/]+' pattern
|
|
||||||
// strncat(c, "([^%c]+)", strlen("([^%c]+)") );
|
|
||||||
// snprintf(pat, 128, "([^%c]+)", sep);
|
|
||||||
sprintf(o, "([^%c]+)", sep);
|
sprintf(o, "([^%c]+)", sep);
|
||||||
o+= sizeof("([^%c]+)");
|
o+= strlen("([^*]+)");
|
||||||
}
|
|
||||||
|
|
||||||
s2++;
|
|
||||||
while( (s2 - str) > len ) {
|
|
||||||
*o = *s2;
|
|
||||||
s2++;
|
|
||||||
o++;
|
|
||||||
}
|
}
|
||||||
|
s1 += s1_len;
|
||||||
|
strncat(o, s1, strlen(s1));
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -252,3 +252,64 @@
|
||||||
1400411322,10574819.37
|
1400411322,10574819.37
|
||||||
1400411340,10536563.80
|
1400411340,10536563.80
|
||||||
1400411381,10703727.13
|
1400411381,10703727.13
|
||||||
|
1400411406,10814145.96
|
||||||
|
1400411717,10680938.12
|
||||||
|
1400411829,11149498.96
|
||||||
|
1400411833,11062632.01
|
||||||
|
1400411856,9571612.03
|
||||||
|
1400411876,11221957.84
|
||||||
|
1400411895,10599710.42
|
||||||
|
1400411903,10817749.52
|
||||||
|
1400412670,10728801.32
|
||||||
|
1400412684,10962187.64
|
||||||
|
1400412708,11267224.66
|
||||||
|
1400412723,10857559.01
|
||||||
|
1400412770,8906644.57
|
||||||
|
1400412827,10953246.38
|
||||||
|
1400412838,10923438.51
|
||||||
|
1400412848,11015834.62
|
||||||
|
1400412895,11344942.77
|
||||||
|
1400412944,10841369.57
|
||||||
|
1400412949,11040353.77
|
||||||
|
1400412961,11156072.62
|
||||||
|
1400412966,10831108.08
|
||||||
|
1400412981,10884440.74
|
||||||
|
1400413003,10862551.12
|
||||||
|
1400413012,10582158.17
|
||||||
|
1400413058,10546292.20
|
||||||
|
1400413092,10922604.09
|
||||||
|
1400413230,11067709.38
|
||||||
|
1400413269,10410991.73
|
||||||
|
1400413317,10980282.65
|
||||||
|
1400413354,10964929.24
|
||||||
|
1400413388,10650346.91
|
||||||
|
1400413435,11113745.92
|
||||||
|
1400413458,11146293.04
|
||||||
|
1400413550,10472731.92
|
||||||
|
1400413559,11177595.40
|
||||||
|
1400413586,10852453.55
|
||||||
|
1400413660,10108857.97
|
||||||
|
1400413696,10929343.81
|
||||||
|
1400413713,10824792.50
|
||||||
|
1400413729,10115599.85
|
||||||
|
1400413766,10973125.90
|
||||||
|
1400413779,9519723.81
|
||||||
|
1400413806,10690956.88
|
||||||
|
1400413819,11268613.09
|
||||||
|
1400414037,11204556.58
|
||||||
|
1400414053,10782873.08
|
||||||
|
1400414061,10921441.80
|
||||||
|
1400414081,11191230.95
|
||||||
|
1400414123,10777241.27
|
||||||
|
1400414133,11087850.62
|
||||||
|
1400414141,10921616.22
|
||||||
|
1400414173,11040258.84
|
||||||
|
1400414317,11319968.07
|
||||||
|
1400414342,10822736.73
|
||||||
|
1400414355,11015188.51
|
||||||
|
1400414389,8485410.70
|
||||||
|
1400414457,11241764.95
|
||||||
|
1400414479,11088645.99
|
||||||
|
1400414501,10750962.96
|
||||||
|
1400414556,11007510.49
|
||||||
|
1400414587,10903071.42
|
||||||
|
|
|
|
@ -161,15 +161,26 @@ START_TEST (test_compile_slug)
|
||||||
END_TEST
|
END_TEST
|
||||||
|
|
||||||
|
|
||||||
START_TEST (test_r3_tree_pcre_patterns_insert)
|
START_TEST (test_pcre_patterns_insert)
|
||||||
{
|
{
|
||||||
node * n = r3_tree_create(10);
|
node * n = r3_tree_create(10);
|
||||||
|
|
||||||
// r3_tree_insert_path(n, "/foo-{user}-{id}", NULL, NULL);
|
// r3_tree_insert_path(n, "/foo-{user}-{id}", NULL, NULL);
|
||||||
// r3_tree_dump(n, 0);
|
// r3_tree_dump(n, 0);
|
||||||
r3_tree_insert_pathl(n, "/post/{handle}-{id}", strlen("/post/{handle}-{id}"), NULL, NULL);
|
r3_tree_insert_pathl(n, "/post/{handle:\\d+}-{id:\\d+}", strlen("/post/{handle:\\d+}-{id:\\d+}"), NULL, NULL);
|
||||||
r3_tree_compile(n);
|
r3_tree_compile(n);
|
||||||
r3_tree_dump(n, 0);
|
r3_tree_dump(n, 0);
|
||||||
|
|
||||||
|
node *matched;
|
||||||
|
matched = r3_tree_match(n, "/post/111-222", strlen("/post/111-222"), NULL);
|
||||||
|
ck_assert(matched);
|
||||||
|
ck_assert_int_gt(matched->endpoint, 0);
|
||||||
|
|
||||||
|
// incomplete string shouldn't match
|
||||||
|
matched = r3_tree_match(n, "/post/111-", strlen("/post/111-"), NULL);
|
||||||
|
ck_assert(matched);
|
||||||
|
ck_assert_int_eq(matched->endpoint, 0);
|
||||||
|
|
||||||
r3_tree_free(n);
|
r3_tree_free(n);
|
||||||
}
|
}
|
||||||
END_TEST
|
END_TEST
|
||||||
|
@ -745,10 +756,10 @@ Suite* r3_suite (void) {
|
||||||
tcase_add_test(tcase, test_insert_route);
|
tcase_add_test(tcase, test_insert_route);
|
||||||
tcase_add_test(tcase, test_pcre_pattern_simple);
|
tcase_add_test(tcase, test_pcre_pattern_simple);
|
||||||
tcase_add_test(tcase, test_pcre_pattern_more);
|
tcase_add_test(tcase, test_pcre_pattern_more);
|
||||||
tcase_add_test(tcase, test_r3_tree_pcre_patterns_insert);
|
tcase_add_test(tcase, test_pcre_patterns_insert);
|
||||||
|
|
||||||
|
|
||||||
tcase_add_test(tcase, benchmark_str);
|
// tcase_add_test(tcase, benchmark_str);
|
||||||
|
|
||||||
suite_add_tcase(suite, tcase);
|
suite_add_tcase(suite, tcase);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue