fix pattern matching for /user/{id}-{user}

This commit is contained in:
c9s 2014-05-18 20:08:43 +08:00
parent f039cc3c8f
commit baf23fafb0
7 changed files with 113 additions and 91 deletions

View file

@ -18,7 +18,7 @@ typedef unsigned char bool;
# define TRUE 1 # define TRUE 1
#endif #endif
#define DEBUG 1 // #define DEBUG 1
#ifdef DEBUG #ifdef DEBUG
#define info(fmt, ...) \ #define info(fmt, ...) \

View file

@ -20,7 +20,7 @@ char * compile_slug(char * str, int len);
bool contains_slug(char * str); bool contains_slug(char * str);
char * find_slug_pattern(char *s1); char * find_slug_pattern(char *s1, int *len);
char * find_slug_placeholder(char *s1, int *len); char * find_slug_placeholder(char *s1, int *len);

View file

@ -40,7 +40,7 @@ edge * r3_edge_create(char * pattern, int pattern_len, node * child) {
* *
* *
* A -> [prefix..suffix] -> B * A -> [prefix..suffix] -> B
* A -> [prefix] -> C -> [suffix] -> B * A -> [prefix] -> B -> [suffix] -> New Child (Copy Data, Edges from B)
* *
*/ */
node * r3_edge_branch(edge *e, int dl) { node * r3_edge_branch(edge *e, int dl) {

View file

@ -153,11 +153,13 @@ void r3_tree_compile_patterns(node * n) {
strncat(p++,")", 1); strncat(p++,")", 1);
} }
if ( i + 1 < n->edge_len ) { if ( i + 1 < n->edge_len && n->edge_len > 1 ) {
strncat(p++,"|",1); strncat(p++,"|",1);
} }
} }
info("pattern: %s\n",cpat);
n->ov_cnt = (1 + n->edge_len) * 3; n->ov_cnt = (1 + n->edge_len) * 3;
n->ov = (int*) calloc(sizeof(int), n->ov_cnt); n->ov = (int*) calloc(sizeof(int), n->ov_cnt);
@ -227,7 +229,7 @@ node * r3_tree_match_with_entry(node * n, match_entry * entry) {
* @param match_entry* entry match_entry is used for saving the captured dynamic strings from pcre result. * @param match_entry* entry match_entry is used for saving the captured dynamic strings from pcre result.
*/ */
node * r3_tree_match(node * n, char * path, int path_len, match_entry * entry) { node * r3_tree_match(node * n, char * path, int path_len, match_entry * entry) {
// info("try matching: %s\n", path); info("try matching: %s\n", path);
edge *e; edge *e;
int rc; int rc;
@ -236,7 +238,7 @@ node * r3_tree_match(node * n, char * path, int path_len, match_entry * entry) {
// if the pcre_pattern is found, and the pointer is not NULL, then it's // if the pcre_pattern is found, and the pointer is not NULL, then it's
// pcre pattern node, we use pcre_exec to match the nodes // pcre pattern node, we use pcre_exec to match the nodes
if (n->pcre_pattern) { if (n->pcre_pattern) {
// info("pcre matching %s on %s\n", n->combined_pattern, path); info("pcre matching %s on %s\n", n->combined_pattern, path);
rc = pcre_exec( rc = pcre_exec(
n->pcre_pattern, /* the compiled pattern */ n->pcre_pattern, /* the compiled pattern */
@ -264,6 +266,7 @@ node * r3_tree_match(node * n, char * path, int path_len, match_entry * entry) {
return NULL; return NULL;
} }
for (i = 1; i < rc; i++) for (i = 1; i < rc; i++)
{ {
char *substring_start = path + n->ov[2*i]; char *substring_start = path + n->ov[2*i];
@ -271,7 +274,7 @@ node * r3_tree_match(node * n, char * path, int path_len, match_entry * entry) {
// info("%2d: %.*s\n", i, substring_length, substring_start); // info("%2d: %.*s\n", i, substring_length, substring_start);
if ( substring_length > 0) { if ( substring_length > 0) {
int restlen = path_len - n->ov[2*i+1]; // fully match to the end int restlen = path_len - n->ov[1]; // fully match to the end
// info("matched item => restlen:%d edges:%d i:%d\n", restlen, n->edge_len, i); // info("matched item => restlen:%d edges:%d i:%d\n", restlen, n->edge_len, i);
e = n->edges[i - 1]; e = n->edges[i - 1];
@ -283,7 +286,8 @@ node * r3_tree_match(node * n, char * path, int path_len, match_entry * entry) {
if (restlen == 0) { if (restlen == 0) {
return e->child; return e->child;
} }
return r3_tree_match( e->child, substring_start + substring_length, restlen, entry); // get the length of orginal string: $0
return r3_tree_match( e->child, path + (n->ov[1] - n->ov[0]), restlen, entry);
} }
} }
// does not match // does not match
@ -323,7 +327,7 @@ inline edge * r3_node_find_edge_str(node * n, char * str, int str_len) {
} }
} }
// info("matching '%s' with '%s'\n", str, node_edge_pattern(n,i) ); info("matching '%s' with '%s'\n", str, node_edge_pattern(n,i) );
if ( strncmp( node_edge_pattern(n,matched_idx), str, node_edge_pattern_len(n,matched_idx) ) == 0 ) { if ( strncmp( node_edge_pattern(n,matched_idx), str, node_edge_pattern_len(n,matched_idx) ) == 0 ) {
return n->edges[matched_idx]; return n->edges[matched_idx];
} }
@ -472,31 +476,10 @@ node * r3_tree_insert_pathl(node *tree, char *path, int path_len, route * route,
/* it's partially matched with the pattern, /* it's partially matched with the pattern,
* we should split the end point and make a branch here... * we should split the end point and make a branch here...
*/ */
node *c2; // child 1, child 2
edge *e2; // edge 1, edge 2
char * s2 = path + prefix_len; char * s2 = path + prefix_len;
int s2_len = 0; int s2_len = path_len - prefix_len;
r3_edge_branch(e, prefix_len); r3_edge_branch(e, prefix_len);
// return r3_tree_insert_pathl(e->child, s2 , s2_len, route , data); return r3_tree_insert_pathl(e->child, s2 , s2_len, route , data);
// here is the new edge from.
c2 = r3_tree_create(3);
s2_len = path_len - prefix_len;
e2 = r3_edge_create(strndup(s2, s2_len), s2_len, c2);
// printf("edge right: %s\n", e2->pattern);
r3_node_append_edge(e->child, e2);
// move n->edges to c1
c2->endpoint++;
c2->data = data;
if (route) {
route->data = data;
r3_node_append_route(c2, route);
}
return c2;
/*
*/
} else { } else {
printf("unexpected route."); printf("unexpected route.");
return NULL; return NULL;
@ -519,6 +502,7 @@ bool r3_node_has_slug_edges(node *n) {
void r3_tree_dump(node * n, int level) { void r3_tree_dump(node * n, int level) {
print_indent(level);
if ( n->combined_pattern ) { if ( n->combined_pattern ) {
printf(" regexp:%s", n->combined_pattern); printf(" regexp:%s", n->combined_pattern);
} }
@ -532,15 +516,11 @@ void r3_tree_dump(node * n, int level) {
for ( int i = 0 ; i < n->edge_len ; i++ ) { for ( int i = 0 ; i < n->edge_len ; i++ ) {
edge * e = n->edges[i]; edge * e = n->edges[i];
print_indent(level); print_indent(level + 1);
printf("|-\"%s\"", e->pattern); printf("|-\"%s\"", e->pattern);
if (e->has_slug) {
printf(" slug:");
printf("%s", compile_slug(e->pattern, e->pattern_len) );
}
if ( e->child ) { if ( e->child ) {
printf("\n");
r3_tree_dump( e->child, level + 1); r3_tree_dump( e->child, level + 1);
} }
printf("\n"); printf("\n");

View file

@ -113,7 +113,7 @@ char * find_slug_placeholder(char *s1, int *len) {
/** /**
* given a slug string, duplicate the pattern string of the slug * given a slug string, duplicate the pattern string of the slug
*/ */
char * find_slug_pattern(char *s1) { char * find_slug_pattern(char *s1, int *len) {
char *c; char *c;
char *s2; char *s2;
int cnt = 1; int cnt = 1;
@ -134,8 +134,8 @@ char * find_slug_pattern(char *s1) {
} else { } else {
return NULL; return NULL;
} }
int len = s2 - c; *len = s2 - c;
return strndup(c, len); return c;
} }
@ -144,75 +144,45 @@ char * find_slug_pattern(char *s1) {
*/ */
char * compile_slug(char * str, int len) char * compile_slug(char * str, int len)
{ {
char *s1 = NULL, *s2 = NULL, *o = NULL; char *s1 = NULL, *o = NULL;
char *pat = NULL; char *pat = NULL;
char sep = '/'; char sep = '/';
// find '{'
s1 = strchr(str, '{'); // append prefix
int s1_len;
s1 = find_slug_placeholder(str, &s1_len);
if ( s1 == NULL ) { if ( s1 == NULL ) {
return strdup(str); return strdup(str);
} }
if ( (s1 - str) > 0 ) {
sep = *(s1-1);
}
char * out = NULL; char * out = NULL;
if ((out = calloc(sizeof(char),128)) == NULL) { if ((out = calloc(sizeof(char),200)) == NULL) {
return (NULL); return (NULL);
} }
// append prefix
o = out; o = out;
strncat(o, str, s1 - str); strncat(o, str, s1 - str); // string before slug
o += (s1 - str); o += (s1 - str);
// start after ':'
if ( NULL != (pat = strchr(s1, ':')) ) {
pat++;
// find closing '}' int pat_len;
int cnt = 1; pat = find_slug_pattern(s1, &pat_len);
s2 = pat;
while(s2) {
if (*s2 == '{' )
cnt++;
else if (*s2 == '}' )
cnt--;
if (cnt == 0)
break;
s2++;
}
// this slug contains a pattern
// s2 = strchr(pat, '}');
if (pat) {
*o = '('; *o = '(';
o++; o++;
strncat(o, pat, pat_len );
strncat(o, pat, (s2 - pat) ); o += pat_len;
o += (s2 - pat);
*o = ')'; *o = ')';
o++; o++;
} else { } else {
// should return a '[^/]+' pattern
// strncat(c, "([^%c]+)", strlen("([^%c]+)") );
// snprintf(pat, 128, "([^%c]+)", sep);
sprintf(o, "([^%c]+)", sep); sprintf(o, "([^%c]+)", sep);
o+= sizeof("([^%c]+)"); o+= strlen("([^*]+)");
}
s2++;
while( (s2 - str) > len ) {
*o = *s2;
s2++;
o++;
} }
s1 += s1_len;
strncat(o, s1, strlen(s1));
return out; return out;
} }

View file

@ -252,3 +252,64 @@
1400411322,10574819.37 1400411322,10574819.37
1400411340,10536563.80 1400411340,10536563.80
1400411381,10703727.13 1400411381,10703727.13
1400411406,10814145.96
1400411717,10680938.12
1400411829,11149498.96
1400411833,11062632.01
1400411856,9571612.03
1400411876,11221957.84
1400411895,10599710.42
1400411903,10817749.52
1400412670,10728801.32
1400412684,10962187.64
1400412708,11267224.66
1400412723,10857559.01
1400412770,8906644.57
1400412827,10953246.38
1400412838,10923438.51
1400412848,11015834.62
1400412895,11344942.77
1400412944,10841369.57
1400412949,11040353.77
1400412961,11156072.62
1400412966,10831108.08
1400412981,10884440.74
1400413003,10862551.12
1400413012,10582158.17
1400413058,10546292.20
1400413092,10922604.09
1400413230,11067709.38
1400413269,10410991.73
1400413317,10980282.65
1400413354,10964929.24
1400413388,10650346.91
1400413435,11113745.92
1400413458,11146293.04
1400413550,10472731.92
1400413559,11177595.40
1400413586,10852453.55
1400413660,10108857.97
1400413696,10929343.81
1400413713,10824792.50
1400413729,10115599.85
1400413766,10973125.90
1400413779,9519723.81
1400413806,10690956.88
1400413819,11268613.09
1400414037,11204556.58
1400414053,10782873.08
1400414061,10921441.80
1400414081,11191230.95
1400414123,10777241.27
1400414133,11087850.62
1400414141,10921616.22
1400414173,11040258.84
1400414317,11319968.07
1400414342,10822736.73
1400414355,11015188.51
1400414389,8485410.70
1400414457,11241764.95
1400414479,11088645.99
1400414501,10750962.96
1400414556,11007510.49
1400414587,10903071.42

1 1400242718 5649455.80
252 1400411322 10574819.37
253 1400411340 10536563.80
254 1400411381 10703727.13
255 1400411406 10814145.96
256 1400411717 10680938.12
257 1400411829 11149498.96
258 1400411833 11062632.01
259 1400411856 9571612.03
260 1400411876 11221957.84
261 1400411895 10599710.42
262 1400411903 10817749.52
263 1400412670 10728801.32
264 1400412684 10962187.64
265 1400412708 11267224.66
266 1400412723 10857559.01
267 1400412770 8906644.57
268 1400412827 10953246.38
269 1400412838 10923438.51
270 1400412848 11015834.62
271 1400412895 11344942.77
272 1400412944 10841369.57
273 1400412949 11040353.77
274 1400412961 11156072.62
275 1400412966 10831108.08
276 1400412981 10884440.74
277 1400413003 10862551.12
278 1400413012 10582158.17
279 1400413058 10546292.20
280 1400413092 10922604.09
281 1400413230 11067709.38
282 1400413269 10410991.73
283 1400413317 10980282.65
284 1400413354 10964929.24
285 1400413388 10650346.91
286 1400413435 11113745.92
287 1400413458 11146293.04
288 1400413550 10472731.92
289 1400413559 11177595.40
290 1400413586 10852453.55
291 1400413660 10108857.97
292 1400413696 10929343.81
293 1400413713 10824792.50
294 1400413729 10115599.85
295 1400413766 10973125.90
296 1400413779 9519723.81
297 1400413806 10690956.88
298 1400413819 11268613.09
299 1400414037 11204556.58
300 1400414053 10782873.08
301 1400414061 10921441.80
302 1400414081 11191230.95
303 1400414123 10777241.27
304 1400414133 11087850.62
305 1400414141 10921616.22
306 1400414173 11040258.84
307 1400414317 11319968.07
308 1400414342 10822736.73
309 1400414355 11015188.51
310 1400414389 8485410.70
311 1400414457 11241764.95
312 1400414479 11088645.99
313 1400414501 10750962.96
314 1400414556 11007510.49
315 1400414587 10903071.42

View file

@ -161,15 +161,26 @@ START_TEST (test_compile_slug)
END_TEST END_TEST
START_TEST (test_r3_tree_pcre_patterns_insert) START_TEST (test_pcre_patterns_insert)
{ {
node * n = r3_tree_create(10); node * n = r3_tree_create(10);
// r3_tree_insert_path(n, "/foo-{user}-{id}", NULL, NULL); // r3_tree_insert_path(n, "/foo-{user}-{id}", NULL, NULL);
// r3_tree_dump(n, 0); // r3_tree_dump(n, 0);
r3_tree_insert_pathl(n, "/post/{handle}-{id}", strlen("/post/{handle}-{id}"), NULL, NULL); r3_tree_insert_pathl(n, "/post/{handle:\\d+}-{id:\\d+}", strlen("/post/{handle:\\d+}-{id:\\d+}"), NULL, NULL);
r3_tree_compile(n); r3_tree_compile(n);
r3_tree_dump(n, 0); r3_tree_dump(n, 0);
node *matched;
matched = r3_tree_match(n, "/post/111-222", strlen("/post/111-222"), NULL);
ck_assert(matched);
ck_assert_int_gt(matched->endpoint, 0);
// incomplete string shouldn't match
matched = r3_tree_match(n, "/post/111-", strlen("/post/111-"), NULL);
ck_assert(matched);
ck_assert_int_eq(matched->endpoint, 0);
r3_tree_free(n); r3_tree_free(n);
} }
END_TEST END_TEST
@ -745,10 +756,10 @@ Suite* r3_suite (void) {
tcase_add_test(tcase, test_insert_route); tcase_add_test(tcase, test_insert_route);
tcase_add_test(tcase, test_pcre_pattern_simple); tcase_add_test(tcase, test_pcre_pattern_simple);
tcase_add_test(tcase, test_pcre_pattern_more); tcase_add_test(tcase, test_pcre_pattern_more);
tcase_add_test(tcase, test_r3_tree_pcre_patterns_insert); tcase_add_test(tcase, test_pcre_patterns_insert);
tcase_add_test(tcase, benchmark_str); // tcase_add_test(tcase, benchmark_str);
suite_add_tcase(suite, tcase); suite_add_tcase(suite, tcase);