pcre stuff

This commit is contained in:
c9s 2014-05-16 14:05:51 +08:00
parent fe21914e00
commit 57dce698db
4 changed files with 174 additions and 72 deletions

View file

@ -12,6 +12,7 @@
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <pcre.h>
#include "token.h"
@ -21,6 +22,27 @@ typedef struct _redge redge;
typedef struct _rnode rnode;
struct _rnode {
redge ** edges;
int edge_len;
int edge_cap;
/* the combined regexp pattern string from pattern_tokens */
char * combined_pattern;
int combined_pattern_len;
pcre * pcre_pattern;
int endpoint;
};
struct _redge {
char * pattern;
int pattern_len;
bool has_slug;
rnode * child;
};
rnode * rnode_create(int cap);
@ -43,7 +65,14 @@ rnode * rnode_insert_routel(rnode *tree, char *route, int route_len);
void rnode_dump(rnode * n, int level);
void rnode_combine_patterns(rnode * n);
redge * rnode_find_edge_str(rnode * n, char * str, int str_len);
void rnode_compile(rnode *n);
void rnode_compile_patterns(rnode * n);
rnode * rnode_match(rnode * n, char * path, int path_len);
bool rnode_has_slug_edges(rnode *n);

View file

@ -4,8 +4,11 @@ include_directories("${PROJECT_SOURCE_DIR}/include")
set(libr3_SRCS node.c str.c list.c token.c)
set(CMAKE_CFLAGS "-Wall -pipe -g3 -funroll-loops")
set(LIBS ${LIBS} ${PCRE_LIBRARIES} ${Judy_LIBRARIES} ${Jemalloc_LIBRARIES} r3)
# add_library(r3-static STATIC ${libr3_SRCS})
add_library(r3 SHARED ${libr3_SRCS})
target_link_libraries(r3 ${LIBS})
# target_link_libraries(r3 cblas)
# install(FILES ${libswiftnav_HEADERS} DESTINATION include/libswiftnav)

View file

@ -16,24 +16,6 @@
#include "node.h"
#include "token.h"
struct _rnode {
redge ** edges;
int edge_len;
int edge_cap;
/* the combined regexp pattern string from pattern_tokens */
char * combined_pattern;
int combined_pattern_len;
int endpoint;
};
struct _redge {
char * pattern;
int pattern_len;
bool has_slug;
rnode * child;
};
// String value as the index http://judy.sourceforge.net/doc/JudySL_3x.htm
@ -110,13 +92,17 @@ redge * rnode_find_edge(rnode * n, char * pat) {
void rnode_compile(rnode *n)
{
bool has_slug_edges = rnode_has_slug_edges(n);
if ( has_slug_edges ) {
rnode_combine_patterns(n);
bool use_slug = rnode_has_slug_edges(n);
if ( use_slug ) {
rnode_compile_patterns(n);
} else {
// use normal text matching...
n->combined_pattern = NULL;
}
for (int i = 0 ; i < n->edge_len ; i++ ) {
rnode_compile(n->edges[i]->child);
}
}
@ -124,7 +110,7 @@ void rnode_compile(rnode *n)
* This function combines ['/foo', '/bar', '/{slug}'] into (/foo)|(/bar)|/([^/]+)}
*
*/
void rnode_combine_patterns(rnode * n) {
void rnode_compile_patterns(rnode * n) {
char * cpat;
char * p;
@ -155,6 +141,88 @@ void rnode_combine_patterns(rnode * n) {
}
n->combined_pattern = cpat;
n->combined_pattern_len = p - cpat;
const char *error;
int erroffset;
// n->pcre_pattern;
n->pcre_pattern = pcre_compile(
n->combined_pattern, /* the pattern */
0, /* default options */
&error, /* for error message */
&erroffset, /* for error offset */
NULL); /* use default character tables */
if (n->pcre_pattern == NULL)
{
printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
return;
}
}
rnode * rnode_match(rnode * n, char * path, int path_len) {
int ovector_count = (n->edge_len + 1) * 2;
int ovector[ovector_count];
if (n->combined_pattern && n->pcre_pattern) {
printf("pcre matching /%s/ on %s\n", n->combined_pattern, path);
// use PCRE for now
int rc;
rc = pcre_exec(
n->pcre_pattern, /* the compiled pattern */
NULL, /* no extra data - we didn't study the pattern */
path, /* the subject string */
path_len, /* the length of the subject */
0, /* start at offset 0 in the subject */
0, /* default options */
ovector, /* output vector for substring information */
ovector_count); /* number of elements in the output vector */
printf("rc: %d\n", rc );
if (rc < 0) {
switch(rc)
{
case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
/*
Handle other special cases if you like
*/
default: printf("Matching error %d\n", rc); break;
}
// does not match all edges, return NULL;
return NULL;
}
int i;
for (i = 1; i < rc; i++)
{
char *substring_start = path + ovector[2*i];
int substring_length = ovector[2*i+1] - ovector[2*i];
printf("%2d: %.*s\n", i, substring_length, substring_start);
if ( substring_length > 0) {
return n->edges[i]->child;
}
}
} else {
}
return NULL;
}
redge * rnode_find_edge_str(rnode * n, char * str, int str_len) {
redge *e;
for ( int i = 0 ; i < n->edge_len ; i++ ) {
e = n->edges[i];
char *p = e->pattern;
while ( *p == *str ) {
p++;
}
if ( p - e->pattern == e->pattern_len ) {
return e;
}
}
return NULL;
}
@ -236,7 +304,7 @@ rnode * rnode_insert_routel(rnode *tree, char *route, int route_len)
// not found, we should just insert a whole new edge
rnode * child = rnode_create(3);
rnode_add_child(n, strndup(route, route_len) , child);
printf("edge not found, insert one: %s\n", route);
// printf("edge not found, insert one: %s\n", route);
n = child;
return n;
@ -352,18 +420,28 @@ void redge_free(redge * e) {
}
void rnode_dump(rnode * n, int level) {
if ( n->edge_len ) {
printf(" => \n");
if ( n->combined_pattern ) {
printf(" regexp: %s", n->combined_pattern);
}
printf("\n");
for ( int i = 0 ; i < n->edge_len ; i++ ) {
redge * e = n->edges[i];
print_indent(level);
printf(" |-\"%s\"", e->pattern);
if ( e->child ) {
rnode_dump( e->child, level + 1);
printf("\n");
} else {
if (e->has_slug) {
printf(" slug:");
printf("%s", compile_slug(e->pattern, e->pattern_len) );
}
if ( e->child && e->child->edges ) {
rnode_dump( e->child, level + 1);
}
printf("\n");
}
}
}

View file

@ -40,25 +40,37 @@ START_TEST (test_rnode_find_edge)
END_TEST
START_TEST (test_combine_patterns)
START_TEST (test_compile)
{
token_array *t;
rnode * n;
n = rnode_create(10);
t = split_route_pattern("/foo", strlen("/foo") );
fail_if( rnode_insert_tokens(n , t) == NULL );
t = split_route_pattern("/bar", strlen("/bar") );
fail_if( rnode_insert_tokens(n , t) == NULL );
rnode_insert_routel(n, "/zoo", strlen("/zoo") );
rnode_insert_routel(n, "/foo", strlen("/foo") );
rnode_insert_routel(n, "/bar", strlen("/bar") );
rnode_compile(n);
fail_if( n->combined_pattern );
fail_if( NULL == rnode_find_edge_str(n, "/", strlen("/") ) );
t = split_route_pattern("/zoo", strlen("/zoo") );
fail_if( rnode_insert_tokens(n , t) == NULL );
rnode_insert_routel(n, "/{id}", strlen("/{id}") );
rnode_compile(n);
rnode_dump(n, 0);
fail_if(n->edges[0]->child->combined_pattern == NULL);
rnode_combine_patterns(n);
redge *e = rnode_find_edge_str(n, "/", strlen("/") );
fail_if( NULL == e );
// printf("%s\n", n->combined_pattern);
/*
printf( "%s\n", e->pattern );
printf( "%s\n", e->child->combined_pattern );
printf( "%s\n", n->edges[0]->child->combined_pattern);
printf( "%s\n", n->combined_pattern );
*/
rnode *m = rnode_match( e->child , "foo", strlen("foo") );
fail_if( NULL == m );
}
END_TEST
@ -111,53 +123,33 @@ START_TEST (test_rnode_insert_routel)
{
rnode * n = rnode_create(10);
printf("Inserting /foo/bar\n");
// printf("Inserting /foo/bar\n");
rnode_insert_routel(n, "/foo/bar", strlen("/foo/bar") );
rnode_dump(n, 0);
// rnode_dump(n, 0);
printf("Inserting /foo/zoo\n");
// printf("Inserting /foo/zoo\n");
rnode_insert_routel(n, "/foo/zoo", strlen("/foo/zoo") );
rnode_dump(n, 0);
// rnode_dump(n, 0);
printf("Inserting /f/id\n");
// printf("Inserting /f/id\n");
rnode_insert_routel(n, "/f/id", strlen("/f/id") );
rnode_dump(n, 0);
// rnode_dump(n, 0);
printf("Inserting /post/{id}\n");
// printf("Inserting /post/{id}\n");
rnode_insert_routel(n, "/post/{id}", strlen("/post/{id}") );
rnode_dump(n, 0);
// rnode_dump(n, 0);
printf("Inserting /post/{handle}\n");
// printf("Inserting /post/{handle}\n");
rnode_insert_routel(n, "/post/{handle}", strlen("/post/{handle}") );
rnode_dump(n, 0);
// rnode_dump(n, 0);
printf("Inserting /post/{handle}-{id}\n");
// printf("Inserting /post/{handle}-{id}\n");
rnode_insert_routel(n, "/post/{handle}-{id}", strlen("/post/{handle}-{id}") );
rnode_combine_patterns(n);
rnode_dump(n, 0);
rnode_compile(n);
// rnode_dump(n, 0);
/*
fail_if(n == NULL, "rnode tree");
t = split_route_pattern("/foo/bar", strlen("/foo/bar") );
fail_if( rnode_insert_tokens(n , t) == NULL );
t = split_route_pattern("/foo/zoo", strlen("/foo/zoo") );
fail_if( rnode_insert_tokens(n , t) == NULL );
t = split_route_pattern("/a/bb", strlen("/a/bb") );
fail_if( rnode_insert_tokens(n , t) == NULL );
t = split_route_pattern("/a/bb/cc", strlen("/a/bb/cc") );
fail_if( rnode_insert_tokens(n , t) == NULL );
t = split_route_pattern("/a/jj/kk", strlen("/a/jj/kk") );
fail_if( rnode_insert_tokens(n , t) == NULL );
*/
/*
fail_if( rnode_lookup(n , "/a/jj/kk" , strlen("/a/jj/kk") ) == NULL );
fail_if( rnode_lookup(n , "/a/jj" , strlen("/a/jj") ) != NULL );
@ -246,7 +238,7 @@ Suite* r3_suite (void) {
tcase_add_test(tcase, test_rnode_find_edge);
tcase_add_test(tcase, test_rnode_insert_routel);
tcase_add_test(tcase, test_compile_slug);
tcase_add_test(tcase, test_combine_patterns);
tcase_add_test(tcase, test_compile);
suite_add_tcase(suite, tcase);