pcre stuff
This commit is contained in:
parent
fe21914e00
commit
57dce698db
4 changed files with 174 additions and 72 deletions
|
@ -12,6 +12,7 @@
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
#include <pcre.h>
|
||||||
|
|
||||||
#include "token.h"
|
#include "token.h"
|
||||||
|
|
||||||
|
@ -21,6 +22,27 @@ typedef struct _redge redge;
|
||||||
typedef struct _rnode rnode;
|
typedef struct _rnode rnode;
|
||||||
|
|
||||||
|
|
||||||
|
struct _rnode {
|
||||||
|
redge ** edges;
|
||||||
|
int edge_len;
|
||||||
|
int edge_cap;
|
||||||
|
|
||||||
|
/* the combined regexp pattern string from pattern_tokens */
|
||||||
|
char * combined_pattern;
|
||||||
|
int combined_pattern_len;
|
||||||
|
pcre * pcre_pattern;
|
||||||
|
|
||||||
|
|
||||||
|
int endpoint;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct _redge {
|
||||||
|
char * pattern;
|
||||||
|
int pattern_len;
|
||||||
|
bool has_slug;
|
||||||
|
rnode * child;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
rnode * rnode_create(int cap);
|
rnode * rnode_create(int cap);
|
||||||
|
@ -43,7 +65,14 @@ rnode * rnode_insert_routel(rnode *tree, char *route, int route_len);
|
||||||
|
|
||||||
void rnode_dump(rnode * n, int level);
|
void rnode_dump(rnode * n, int level);
|
||||||
|
|
||||||
void rnode_combine_patterns(rnode * n);
|
redge * rnode_find_edge_str(rnode * n, char * str, int str_len);
|
||||||
|
|
||||||
|
|
||||||
|
void rnode_compile(rnode *n);
|
||||||
|
|
||||||
|
void rnode_compile_patterns(rnode * n);
|
||||||
|
|
||||||
|
rnode * rnode_match(rnode * n, char * path, int path_len);
|
||||||
|
|
||||||
bool rnode_has_slug_edges(rnode *n);
|
bool rnode_has_slug_edges(rnode *n);
|
||||||
|
|
||||||
|
|
|
@ -4,8 +4,11 @@ include_directories("${PROJECT_SOURCE_DIR}/include")
|
||||||
|
|
||||||
set(libr3_SRCS node.c str.c list.c token.c)
|
set(libr3_SRCS node.c str.c list.c token.c)
|
||||||
set(CMAKE_CFLAGS "-Wall -pipe -g3 -funroll-loops")
|
set(CMAKE_CFLAGS "-Wall -pipe -g3 -funroll-loops")
|
||||||
|
set(LIBS ${LIBS} ${PCRE_LIBRARIES} ${Judy_LIBRARIES} ${Jemalloc_LIBRARIES} r3)
|
||||||
|
|
||||||
# add_library(r3-static STATIC ${libr3_SRCS})
|
# add_library(r3-static STATIC ${libr3_SRCS})
|
||||||
add_library(r3 SHARED ${libr3_SRCS})
|
add_library(r3 SHARED ${libr3_SRCS})
|
||||||
|
target_link_libraries(r3 ${LIBS})
|
||||||
|
|
||||||
# target_link_libraries(r3 cblas)
|
# target_link_libraries(r3 cblas)
|
||||||
# install(FILES ${libswiftnav_HEADERS} DESTINATION include/libswiftnav)
|
# install(FILES ${libswiftnav_HEADERS} DESTINATION include/libswiftnav)
|
||||||
|
|
134
src/node.c
134
src/node.c
|
@ -16,24 +16,6 @@
|
||||||
#include "node.h"
|
#include "node.h"
|
||||||
#include "token.h"
|
#include "token.h"
|
||||||
|
|
||||||
struct _rnode {
|
|
||||||
redge ** edges;
|
|
||||||
int edge_len;
|
|
||||||
int edge_cap;
|
|
||||||
|
|
||||||
/* the combined regexp pattern string from pattern_tokens */
|
|
||||||
char * combined_pattern;
|
|
||||||
int combined_pattern_len;
|
|
||||||
|
|
||||||
int endpoint;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct _redge {
|
|
||||||
char * pattern;
|
|
||||||
int pattern_len;
|
|
||||||
bool has_slug;
|
|
||||||
rnode * child;
|
|
||||||
};
|
|
||||||
|
|
||||||
// String value as the index http://judy.sourceforge.net/doc/JudySL_3x.htm
|
// String value as the index http://judy.sourceforge.net/doc/JudySL_3x.htm
|
||||||
|
|
||||||
|
@ -110,13 +92,17 @@ redge * rnode_find_edge(rnode * n, char * pat) {
|
||||||
|
|
||||||
void rnode_compile(rnode *n)
|
void rnode_compile(rnode *n)
|
||||||
{
|
{
|
||||||
bool has_slug_edges = rnode_has_slug_edges(n);
|
bool use_slug = rnode_has_slug_edges(n);
|
||||||
if ( has_slug_edges ) {
|
if ( use_slug ) {
|
||||||
rnode_combine_patterns(n);
|
rnode_compile_patterns(n);
|
||||||
} else {
|
} else {
|
||||||
// use normal text matching...
|
// use normal text matching...
|
||||||
n->combined_pattern = NULL;
|
n->combined_pattern = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (int i = 0 ; i < n->edge_len ; i++ ) {
|
||||||
|
rnode_compile(n->edges[i]->child);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -124,7 +110,7 @@ void rnode_compile(rnode *n)
|
||||||
* This function combines ['/foo', '/bar', '/{slug}'] into (/foo)|(/bar)|/([^/]+)}
|
* This function combines ['/foo', '/bar', '/{slug}'] into (/foo)|(/bar)|/([^/]+)}
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
void rnode_combine_patterns(rnode * n) {
|
void rnode_compile_patterns(rnode * n) {
|
||||||
char * cpat;
|
char * cpat;
|
||||||
char * p;
|
char * p;
|
||||||
|
|
||||||
|
@ -155,6 +141,88 @@ void rnode_combine_patterns(rnode * n) {
|
||||||
}
|
}
|
||||||
n->combined_pattern = cpat;
|
n->combined_pattern = cpat;
|
||||||
n->combined_pattern_len = p - cpat;
|
n->combined_pattern_len = p - cpat;
|
||||||
|
|
||||||
|
|
||||||
|
const char *error;
|
||||||
|
int erroffset;
|
||||||
|
|
||||||
|
// n->pcre_pattern;
|
||||||
|
n->pcre_pattern = pcre_compile(
|
||||||
|
n->combined_pattern, /* the pattern */
|
||||||
|
0, /* default options */
|
||||||
|
&error, /* for error message */
|
||||||
|
&erroffset, /* for error offset */
|
||||||
|
NULL); /* use default character tables */
|
||||||
|
if (n->pcre_pattern == NULL)
|
||||||
|
{
|
||||||
|
printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
rnode * rnode_match(rnode * n, char * path, int path_len) {
|
||||||
|
int ovector_count = (n->edge_len + 1) * 2;
|
||||||
|
int ovector[ovector_count];
|
||||||
|
|
||||||
|
if (n->combined_pattern && n->pcre_pattern) {
|
||||||
|
printf("pcre matching /%s/ on %s\n", n->combined_pattern, path);
|
||||||
|
// use PCRE for now
|
||||||
|
int rc;
|
||||||
|
rc = pcre_exec(
|
||||||
|
n->pcre_pattern, /* the compiled pattern */
|
||||||
|
NULL, /* no extra data - we didn't study the pattern */
|
||||||
|
path, /* the subject string */
|
||||||
|
path_len, /* the length of the subject */
|
||||||
|
0, /* start at offset 0 in the subject */
|
||||||
|
0, /* default options */
|
||||||
|
ovector, /* output vector for substring information */
|
||||||
|
ovector_count); /* number of elements in the output vector */
|
||||||
|
|
||||||
|
printf("rc: %d\n", rc );
|
||||||
|
if (rc < 0) {
|
||||||
|
switch(rc)
|
||||||
|
{
|
||||||
|
case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
|
||||||
|
/*
|
||||||
|
Handle other special cases if you like
|
||||||
|
*/
|
||||||
|
default: printf("Matching error %d\n", rc); break;
|
||||||
|
}
|
||||||
|
// does not match all edges, return NULL;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
int i;
|
||||||
|
for (i = 1; i < rc; i++)
|
||||||
|
{
|
||||||
|
char *substring_start = path + ovector[2*i];
|
||||||
|
int substring_length = ovector[2*i+1] - ovector[2*i];
|
||||||
|
printf("%2d: %.*s\n", i, substring_length, substring_start);
|
||||||
|
if ( substring_length > 0) {
|
||||||
|
return n->edges[i]->child;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
redge * rnode_find_edge_str(rnode * n, char * str, int str_len) {
|
||||||
|
redge *e;
|
||||||
|
for ( int i = 0 ; i < n->edge_len ; i++ ) {
|
||||||
|
e = n->edges[i];
|
||||||
|
char *p = e->pattern;
|
||||||
|
while ( *p == *str ) {
|
||||||
|
p++;
|
||||||
|
}
|
||||||
|
if ( p - e->pattern == e->pattern_len ) {
|
||||||
|
return e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -236,7 +304,7 @@ rnode * rnode_insert_routel(rnode *tree, char *route, int route_len)
|
||||||
// not found, we should just insert a whole new edge
|
// not found, we should just insert a whole new edge
|
||||||
rnode * child = rnode_create(3);
|
rnode * child = rnode_create(3);
|
||||||
rnode_add_child(n, strndup(route, route_len) , child);
|
rnode_add_child(n, strndup(route, route_len) , child);
|
||||||
printf("edge not found, insert one: %s\n", route);
|
// printf("edge not found, insert one: %s\n", route);
|
||||||
|
|
||||||
n = child;
|
n = child;
|
||||||
return n;
|
return n;
|
||||||
|
@ -352,18 +420,28 @@ void redge_free(redge * e) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void rnode_dump(rnode * n, int level) {
|
void rnode_dump(rnode * n, int level) {
|
||||||
if ( n->edge_len ) {
|
if ( n->edge_len ) {
|
||||||
printf(" => \n");
|
if ( n->combined_pattern ) {
|
||||||
|
printf(" regexp: %s", n->combined_pattern);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
for ( int i = 0 ; i < n->edge_len ; i++ ) {
|
for ( int i = 0 ; i < n->edge_len ; i++ ) {
|
||||||
redge * e = n->edges[i];
|
redge * e = n->edges[i];
|
||||||
print_indent(level);
|
print_indent(level);
|
||||||
printf(" |-\"%s\"", e->pattern);
|
printf(" |-\"%s\"", e->pattern);
|
||||||
if ( e->child ) {
|
|
||||||
rnode_dump( e->child, level + 1);
|
if (e->has_slug) {
|
||||||
printf("\n");
|
printf(" slug:");
|
||||||
} else {
|
printf("%s", compile_slug(e->pattern, e->pattern_len) );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( e->child && e->child->edges ) {
|
||||||
|
rnode_dump( e->child, level + 1);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,25 +40,37 @@ START_TEST (test_rnode_find_edge)
|
||||||
END_TEST
|
END_TEST
|
||||||
|
|
||||||
|
|
||||||
START_TEST (test_combine_patterns)
|
START_TEST (test_compile)
|
||||||
{
|
{
|
||||||
token_array *t;
|
token_array *t;
|
||||||
rnode * n;
|
rnode * n;
|
||||||
n = rnode_create(10);
|
n = rnode_create(10);
|
||||||
|
|
||||||
|
|
||||||
t = split_route_pattern("/foo", strlen("/foo") );
|
|
||||||
fail_if( rnode_insert_tokens(n , t) == NULL );
|
|
||||||
|
|
||||||
t = split_route_pattern("/bar", strlen("/bar") );
|
rnode_insert_routel(n, "/zoo", strlen("/zoo") );
|
||||||
fail_if( rnode_insert_tokens(n , t) == NULL );
|
rnode_insert_routel(n, "/foo", strlen("/foo") );
|
||||||
|
rnode_insert_routel(n, "/bar", strlen("/bar") );
|
||||||
|
rnode_compile(n);
|
||||||
|
fail_if( n->combined_pattern );
|
||||||
|
fail_if( NULL == rnode_find_edge_str(n, "/", strlen("/") ) );
|
||||||
|
|
||||||
t = split_route_pattern("/zoo", strlen("/zoo") );
|
rnode_insert_routel(n, "/{id}", strlen("/{id}") );
|
||||||
fail_if( rnode_insert_tokens(n , t) == NULL );
|
rnode_compile(n);
|
||||||
|
rnode_dump(n, 0);
|
||||||
|
fail_if(n->edges[0]->child->combined_pattern == NULL);
|
||||||
|
|
||||||
rnode_combine_patterns(n);
|
redge *e = rnode_find_edge_str(n, "/", strlen("/") );
|
||||||
|
fail_if( NULL == e );
|
||||||
|
|
||||||
// printf("%s\n", n->combined_pattern);
|
/*
|
||||||
|
printf( "%s\n", e->pattern );
|
||||||
|
printf( "%s\n", e->child->combined_pattern );
|
||||||
|
printf( "%s\n", n->edges[0]->child->combined_pattern);
|
||||||
|
printf( "%s\n", n->combined_pattern );
|
||||||
|
*/
|
||||||
|
rnode *m = rnode_match( e->child , "foo", strlen("foo") );
|
||||||
|
fail_if( NULL == m );
|
||||||
}
|
}
|
||||||
END_TEST
|
END_TEST
|
||||||
|
|
||||||
|
@ -111,53 +123,33 @@ START_TEST (test_rnode_insert_routel)
|
||||||
{
|
{
|
||||||
rnode * n = rnode_create(10);
|
rnode * n = rnode_create(10);
|
||||||
|
|
||||||
printf("Inserting /foo/bar\n");
|
// printf("Inserting /foo/bar\n");
|
||||||
rnode_insert_routel(n, "/foo/bar", strlen("/foo/bar") );
|
rnode_insert_routel(n, "/foo/bar", strlen("/foo/bar") );
|
||||||
rnode_dump(n, 0);
|
// rnode_dump(n, 0);
|
||||||
|
|
||||||
printf("Inserting /foo/zoo\n");
|
// printf("Inserting /foo/zoo\n");
|
||||||
rnode_insert_routel(n, "/foo/zoo", strlen("/foo/zoo") );
|
rnode_insert_routel(n, "/foo/zoo", strlen("/foo/zoo") );
|
||||||
rnode_dump(n, 0);
|
// rnode_dump(n, 0);
|
||||||
|
|
||||||
printf("Inserting /f/id\n");
|
// printf("Inserting /f/id\n");
|
||||||
rnode_insert_routel(n, "/f/id", strlen("/f/id") );
|
rnode_insert_routel(n, "/f/id", strlen("/f/id") );
|
||||||
rnode_dump(n, 0);
|
// rnode_dump(n, 0);
|
||||||
|
|
||||||
printf("Inserting /post/{id}\n");
|
// printf("Inserting /post/{id}\n");
|
||||||
rnode_insert_routel(n, "/post/{id}", strlen("/post/{id}") );
|
rnode_insert_routel(n, "/post/{id}", strlen("/post/{id}") );
|
||||||
rnode_dump(n, 0);
|
// rnode_dump(n, 0);
|
||||||
|
|
||||||
printf("Inserting /post/{handle}\n");
|
// printf("Inserting /post/{handle}\n");
|
||||||
rnode_insert_routel(n, "/post/{handle}", strlen("/post/{handle}") );
|
rnode_insert_routel(n, "/post/{handle}", strlen("/post/{handle}") );
|
||||||
rnode_dump(n, 0);
|
// rnode_dump(n, 0);
|
||||||
|
|
||||||
printf("Inserting /post/{handle}-{id}\n");
|
// printf("Inserting /post/{handle}-{id}\n");
|
||||||
rnode_insert_routel(n, "/post/{handle}-{id}", strlen("/post/{handle}-{id}") );
|
rnode_insert_routel(n, "/post/{handle}-{id}", strlen("/post/{handle}-{id}") );
|
||||||
rnode_combine_patterns(n);
|
rnode_compile(n);
|
||||||
rnode_dump(n, 0);
|
// rnode_dump(n, 0);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
fail_if(n == NULL, "rnode tree");
|
|
||||||
|
|
||||||
t = split_route_pattern("/foo/bar", strlen("/foo/bar") );
|
|
||||||
fail_if( rnode_insert_tokens(n , t) == NULL );
|
|
||||||
|
|
||||||
t = split_route_pattern("/foo/zoo", strlen("/foo/zoo") );
|
|
||||||
fail_if( rnode_insert_tokens(n , t) == NULL );
|
|
||||||
|
|
||||||
t = split_route_pattern("/a/bb", strlen("/a/bb") );
|
|
||||||
fail_if( rnode_insert_tokens(n , t) == NULL );
|
|
||||||
|
|
||||||
t = split_route_pattern("/a/bb/cc", strlen("/a/bb/cc") );
|
|
||||||
fail_if( rnode_insert_tokens(n , t) == NULL );
|
|
||||||
|
|
||||||
t = split_route_pattern("/a/jj/kk", strlen("/a/jj/kk") );
|
|
||||||
fail_if( rnode_insert_tokens(n , t) == NULL );
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
fail_if( rnode_lookup(n , "/a/jj/kk" , strlen("/a/jj/kk") ) == NULL );
|
fail_if( rnode_lookup(n , "/a/jj/kk" , strlen("/a/jj/kk") ) == NULL );
|
||||||
fail_if( rnode_lookup(n , "/a/jj" , strlen("/a/jj") ) != NULL );
|
fail_if( rnode_lookup(n , "/a/jj" , strlen("/a/jj") ) != NULL );
|
||||||
|
@ -246,7 +238,7 @@ Suite* r3_suite (void) {
|
||||||
tcase_add_test(tcase, test_rnode_find_edge);
|
tcase_add_test(tcase, test_rnode_find_edge);
|
||||||
tcase_add_test(tcase, test_rnode_insert_routel);
|
tcase_add_test(tcase, test_rnode_insert_routel);
|
||||||
tcase_add_test(tcase, test_compile_slug);
|
tcase_add_test(tcase, test_compile_slug);
|
||||||
tcase_add_test(tcase, test_combine_patterns);
|
tcase_add_test(tcase, test_compile);
|
||||||
|
|
||||||
suite_add_tcase(suite, tcase);
|
suite_add_tcase(suite, tcase);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue