r3/src/node.c

483 lines
13 KiB
C
Raw Normal View History

2014-05-14 22:08:42 -04:00
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
2014-05-14 22:08:42 -04:00
// Jemalloc memory management
#include <jemalloc/jemalloc.h>
2014-05-15 01:39:50 -04:00
// PCRE
#include <pcre.h>
2014-05-14 22:08:42 -04:00
// Judy array
#include <Judy.h>
2014-05-16 08:22:25 -04:00
#include "r3_define.h"
#include "r3_str.h"
#include "r3.h"
#include "str_array.h"
2014-05-15 01:39:50 -04:00
2014-05-15 06:02:10 -04:00
2014-05-14 22:08:42 -04:00
// String value as the index http://judy.sourceforge.net/doc/JudySL_3x.htm
2014-05-15 01:39:50 -04:00
2014-05-15 06:26:41 -04:00
/**
2014-05-16 03:29:25 -04:00
* Create a node object
2014-05-15 06:26:41 -04:00
*/
2014-05-16 06:57:36 -04:00
node * r3_tree_create(int cap) {
2014-05-16 03:29:25 -04:00
node * n = (node*) malloc( sizeof(node) );
2014-05-15 01:39:50 -04:00
2014-05-16 03:29:25 -04:00
n->edges = (edge**) malloc( sizeof(edge*) * 10 );
n->edge_len = 0;
n->edge_cap = cap;
2014-05-15 06:26:41 -04:00
n->endpoint = 0;
2014-05-15 09:17:30 -04:00
n->combined_pattern = NULL;
2014-05-16 08:51:30 -04:00
n->pcre_pattern = NULL;
n->pcre_extra = NULL;
n->ov_cnt = 0;
n->ov = NULL;
2014-05-15 01:39:50 -04:00
return n;
}
2014-05-16 06:57:36 -04:00
void r3_tree_free(node * tree) {
for (int i = 0 ; i < tree->edge_len ; i++ ) {
2014-05-15 10:57:13 -04:00
if (tree->edges[i]) {
2014-05-16 06:57:36 -04:00
r3_edge_free(tree->edges[ i ]);
2014-05-15 10:57:13 -04:00
}
2014-05-15 01:39:50 -04:00
}
2014-05-16 03:29:25 -04:00
if (tree->combined_pattern)
free(tree->combined_pattern);
if (tree->pcre_pattern)
free(tree->pcre_pattern);
if (tree->pcre_extra)
free(tree->pcre_extra);
if (tree->ov)
free(tree->ov);
2014-05-15 10:57:13 -04:00
free(tree->edges);
// str_array_free(tree->edge_patterns);
2014-05-15 01:39:50 -04:00
free(tree);
tree = NULL;
}
2014-05-15 06:02:10 -04:00
2014-05-15 01:39:50 -04:00
/* parent node, edge pattern, child */
2014-05-16 06:57:36 -04:00
edge * r3_tree_add_child(node * n, char * pat , node *child) {
2014-05-15 01:39:50 -04:00
// find the same sub-pattern, if it does not exist, create one
2014-05-16 03:29:25 -04:00
edge * e;
2014-05-15 06:02:10 -04:00
2014-05-16 06:57:36 -04:00
e = r3_node_find_edge(n, pat);
2014-05-15 06:02:10 -04:00
if (e) {
2014-05-15 10:57:13 -04:00
return e;
2014-05-15 01:39:50 -04:00
}
2014-05-16 06:57:36 -04:00
e = r3_edge_create( pat, strlen(pat), child);
r3_tree_append_edge(n, e);
// str_array_append(n->edge_patterns, pat);
// assert( str_array_len(n->edge_patterns) == n->edge_len );
2014-05-15 10:57:13 -04:00
return e;
2014-05-15 01:39:50 -04:00
}
2014-05-15 06:02:10 -04:00
2014-05-16 06:57:36 -04:00
void r3_tree_append_edge(node *n, edge *e) {
2014-05-16 03:29:25 -04:00
if (!n->edges) {
n->edge_cap = 3;
n->edges = malloc(sizeof(edge) * n->edge_cap);
2014-05-16 03:29:25 -04:00
}
if (n->edge_len >= n->edge_cap) {
n->edge_cap *= 2;
n->edges = realloc(n->edges, sizeof(edge) * n->edge_cap);
2014-05-15 01:39:50 -04:00
}
n->edges[ n->edge_len++ ] = e;
2014-05-15 01:39:50 -04:00
}
2014-05-16 06:57:36 -04:00
edge * r3_node_find_edge(node * n, char * pat) {
2014-05-16 03:29:25 -04:00
edge * e;
for (int i = 0 ; i < n->edge_len ; i++ ) {
2014-05-15 10:57:13 -04:00
e = n->edges[i];
2014-05-15 06:02:10 -04:00
if ( strcmp(e->pattern, pat) == 0 ) {
return e;
2014-05-15 01:39:50 -04:00
}
}
return NULL;
}
2014-05-16 06:57:36 -04:00
void r3_tree_compile(node *n)
2014-05-16 00:33:59 -04:00
{
2014-05-16 06:57:36 -04:00
bool use_slug = r3_node_has_slug_edges(n);
2014-05-16 02:05:51 -04:00
if ( use_slug ) {
2014-05-16 06:57:36 -04:00
r3_tree_compile_patterns(n);
2014-05-16 00:33:59 -04:00
} else {
// use normal text matching...
n->combined_pattern = NULL;
}
2014-05-16 02:05:51 -04:00
for (int i = 0 ; i < n->edge_len ; i++ ) {
2014-05-16 06:57:36 -04:00
r3_tree_compile(n->edges[i]->child);
2014-05-16 02:05:51 -04:00
}
2014-05-16 00:33:59 -04:00
}
2014-05-15 09:17:30 -04:00
/**
* This function combines ['/foo', '/bar', '/{slug}'] into (/foo)|(/bar)|/([^/]+)}
*
*/
2014-05-16 06:57:36 -04:00
void r3_tree_compile_patterns(node * n) {
2014-05-15 09:17:30 -04:00
char * cpat;
char * p;
2014-05-16 00:33:59 -04:00
cpat = calloc(sizeof(char),128);
2014-05-15 09:17:30 -04:00
if (cpat==NULL)
return;
p = cpat;
2014-05-16 06:57:36 -04:00
strncat(p, "^", 1);
p++;
2014-05-16 06:03:52 -04:00
2014-05-16 03:29:25 -04:00
edge *e = NULL;
for ( int i = 0 ; i < n->edge_len ; i++ ) {
2014-05-15 10:57:13 -04:00
e = n->edges[i];
2014-05-16 00:33:59 -04:00
if ( e->has_slug ) {
char * slug_pat = compile_slug(e->pattern, e->pattern_len);
strcat(p, slug_pat);
} else {
strncat(p++,"(", 1);
2014-05-15 09:17:30 -04:00
2014-05-16 00:33:59 -04:00
strncat(p, e->pattern, e->pattern_len);
p += e->pattern_len;
2014-05-15 09:17:30 -04:00
2014-05-16 00:33:59 -04:00
strncat(p++,")", 1);
}
2014-05-15 09:17:30 -04:00
if ( i + 1 < n->edge_len ) {
2014-05-15 09:17:30 -04:00
strncat(p++,"|",1);
}
2014-05-15 08:38:07 -04:00
}
2014-05-16 08:51:30 -04:00
n->ov_cnt = (1 + n->edge_len) * 3;
n->ov = (int*) calloc(sizeof(int), n->ov_cnt);
2014-05-15 09:17:30 -04:00
n->combined_pattern = cpat;
n->combined_pattern_len = p - cpat;
2014-05-16 02:05:51 -04:00
const char *error;
int erroffset;
2014-05-16 06:03:52 -04:00
unsigned int option_bits;
if (n->pcre_pattern)
free(n->pcre_pattern);
if (n->pcre_extra)
free(n->pcre_extra);
2014-05-16 02:05:51 -04:00
// n->pcre_pattern;
n->pcre_pattern = pcre_compile(
n->combined_pattern, /* the pattern */
0, /* default options */
&error, /* for error message */
&erroffset, /* for error offset */
NULL); /* use default character tables */
2014-05-16 03:29:25 -04:00
if (n->pcre_pattern == NULL) {
2014-05-16 02:05:51 -04:00
printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
return;
}
2014-05-16 03:29:25 -04:00
n->pcre_extra = pcre_study(n->pcre_pattern, 0, &error);
if (n->pcre_extra == NULL) {
printf("PCRE study failed at offset %s\n", error);
return;
}
2014-05-16 02:05:51 -04:00
}
2014-05-16 07:12:01 -04:00
match_entry * match_entry_create(char * path, int path_len) {
match_entry * entry = malloc(sizeof(match_entry));
if(!entry)
return NULL;
entry->vars = str_array_create(3);
entry->path = path;
entry->path_len = path_len;
entry->route_ptr = NULL;
return entry;
}
void match_entry_free(match_entry * entry) {
str_array_free(entry->vars);
free(entry);
}
2014-05-16 02:05:51 -04:00
2014-05-16 02:24:00 -04:00
2014-05-16 08:51:30 -04:00
/**
* This function matches the URL path and return the left node
*
* r3_tree_match returns NULL when the path does not match. returns *node when the path matches.
*
* @param node n the root of the tree
* @param char* path the URL path to dispatch
* @param int path_len the length of the URL path.
* @param match_entry* entry match_entry is used for saving the captured dynamic strings from pcre result.
*/
2014-05-16 06:57:36 -04:00
node * r3_tree_match(node * n, char * path, int path_len, match_entry * entry) {
2014-05-16 06:03:52 -04:00
// info("try matching: %s\n", path);
2014-05-16 06:57:36 -04:00
edge *e;
2014-05-16 09:02:02 -04:00
int rc;
int i;
2014-05-16 08:51:30 -04:00
// if the pcre_pattern is found, and the pointer is not NULL, then it's
// pcre pattern node, we use pcre_exec to match the nodes
if (n->pcre_pattern) {
2014-05-16 02:36:48 -04:00
info("pcre matching %s on %s\n", n->combined_pattern, path);
2014-05-16 06:57:36 -04:00
2014-05-16 02:05:51 -04:00
rc = pcre_exec(
n->pcre_pattern, /* the compiled pattern */
2014-05-16 06:57:36 -04:00
// PCRE Study makes this slow
NULL, // n->pcre_extra, /* no extra data - we didn't study the pattern */
2014-05-16 02:05:51 -04:00
path, /* the subject string */
path_len, /* the length of the subject */
0, /* start at offset 0 in the subject */
0, /* default options */
n->ov, /* output vector for substring information */
2014-05-16 08:51:30 -04:00
n->ov_cnt); /* number of elements in the output vector */
2014-05-16 02:05:51 -04:00
2014-05-16 02:36:48 -04:00
info("rc: %d\n", rc );
2014-05-16 02:05:51 -04:00
if (rc < 0) {
switch(rc)
{
case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
/*
Handle other special cases if you like
*/
default: printf("Matching error %d\n", rc); break;
}
// does not match all edges, return NULL;
return NULL;
}
for (i = 1; i < rc; i++)
{
char *substring_start = path + n->ov[2*i];
int substring_length = n->ov[2*i+1] - n->ov[2*i];
2014-05-16 02:36:48 -04:00
info("%2d: %.*s\n", i, substring_length, substring_start);
2014-05-16 06:03:52 -04:00
2014-05-16 02:05:51 -04:00
if ( substring_length > 0) {
int restlen = path_len - n->ov[2*i+1]; // fully match to the end
info("matched item => restlen:%d edges:%d i:%d\n", restlen, n->edge_len, i);
2014-05-16 06:03:52 -04:00
e = n->edges[i - 1];
if (entry && e->has_slug) {
2014-05-16 07:12:01 -04:00
// append captured token to entry
str_array_append(entry->vars , strndup(substring_start, substring_length));
2014-05-16 06:03:52 -04:00
}
if (restlen == 0) {
return e->child;
2014-05-16 02:24:00 -04:00
}
2014-05-16 06:57:36 -04:00
return r3_tree_match( e->child, substring_start + substring_length, restlen, entry);
2014-05-16 02:05:51 -04:00
}
}
2014-05-16 02:24:00 -04:00
// does not match
return NULL;
}
2014-05-16 02:05:51 -04:00
2014-05-16 09:02:02 -04:00
if ( (e = r3_node_find_edge_str(n, path, path_len)) != NULL ) {
2014-05-16 06:57:36 -04:00
int restlen = path_len - e->pattern_len;
2014-05-16 09:02:02 -04:00
if(restlen > 0) {
2014-05-16 06:57:36 -04:00
return r3_tree_match(e->child, path + e->pattern_len, restlen, entry);
2014-05-16 02:24:00 -04:00
}
2014-05-16 09:02:02 -04:00
return e->child;
2014-05-16 02:05:51 -04:00
}
return NULL;
}
2014-05-16 06:03:52 -04:00
2014-05-16 06:57:36 -04:00
inline edge * r3_node_find_edge_str(node * n, char * str, int str_len) {
int i = 0;
for (; i < n->edge_len ; i++ ) {
2014-05-16 06:57:36 -04:00
info("matching '%s' with '%s'\n", str, node_edge_pattern(n,i) );
if ( strncmp( node_edge_pattern(n,i), str, node_edge_pattern_len(n,i) ) == 0 ) {
return n->edges[i];
2014-05-16 02:05:51 -04:00
}
}
return NULL;
2014-05-15 08:38:07 -04:00
}
2014-05-15 06:26:41 -04:00
2014-05-16 06:57:36 -04:00
node * r3_tree_lookup(node * tree, char * path, int path_len) {
2014-05-16 06:03:52 -04:00
str_array * tokens = split_route_pattern(path, path_len);
2014-05-15 06:26:41 -04:00
2014-05-16 03:29:25 -04:00
node * n = tree;
edge * e = NULL;
2014-05-15 06:26:41 -04:00
for ( int i = 0 ; i < tokens->len ; i++ ) {
2014-05-16 06:57:36 -04:00
e = r3_node_find_edge(n, str_array_fetch(tokens, i) );
2014-05-15 06:26:41 -04:00
if (!e) {
return NULL;
}
n = e->child;
}
if (n->endpoint) {
return n;
}
return NULL;
}
2014-05-16 06:57:36 -04:00
node * r3_node_create() {
2014-05-16 03:29:25 -04:00
node * n = (node*) malloc( sizeof(node) );
n->edges = NULL;
n->edge_len = 0;
n->edge_cap = 0;
2014-05-16 03:29:25 -04:00
n->endpoint = 0;
n->combined_pattern = NULL;
n->pcre_pattern = NULL;
return n;
}
2014-05-15 06:26:41 -04:00
2014-05-16 06:57:36 -04:00
node * r3_tree_insert_path(node *tree, char *route, void * route_ptr)
2014-05-15 06:26:41 -04:00
{
2014-05-16 06:57:36 -04:00
return r3_tree_insert_pathn(tree, route, strlen(route) , route_ptr);
2014-05-15 06:26:41 -04:00
}
2014-05-16 06:57:36 -04:00
node * r3_tree_insert_pathn(node *tree, char *route, int route_len, void * route_ptr)
2014-05-15 06:26:41 -04:00
{
2014-05-16 03:29:25 -04:00
node * n = tree;
edge * e = NULL;
2014-05-15 10:57:13 -04:00
char * p = route;
/* length of common prefix */
int dl = 0;
for( int i = 0 ; i < n->edge_len ; i++ ) {
2014-05-15 10:57:13 -04:00
dl = strndiff(route, n->edges[i]->pattern, n->edges[i]->pattern_len);
// printf("dl: %d %s vs %s\n", dl, route, n->edges[i]->pattern );
// no common, consider insert a new edge
if ( dl > 0 ) {
e = n->edges[i];
break;
}
}
2014-05-15 12:00:19 -04:00
// branch the edge at correct position (avoid broken slugs)
char *slug_s = strchr(route, '{');
char *slug_e = strchr(route, '}');
if ( slug_s && slug_e ) {
if ( dl > (slug_s - route) && dl < (slug_e - route) ) {
// break before '{'
dl = slug_s - route;
}
}
2014-05-15 10:57:13 -04:00
if ( dl == 0 ) {
// not found, we should just insert a whole new edge
2014-05-16 06:57:36 -04:00
node * child = r3_tree_create(3);
r3_tree_add_child(n, strndup(route, route_len) , child);
2014-05-16 03:29:25 -04:00
info("edge not found, insert one: %s\n", route);
child->route_ptr = route_ptr;
2014-05-16 06:03:52 -04:00
child->endpoint++;
2014-05-16 03:29:25 -04:00
return child;
2014-05-15 10:57:13 -04:00
} else if ( dl == e->pattern_len ) { // fully-equal to the pattern of the edge
2014-05-15 10:57:13 -04:00
char * subroute = route + dl;
int subroute_len = route_len - dl;
// there are something more we can insert
if ( subroute_len > 0 ) {
2014-05-16 06:57:36 -04:00
return r3_tree_insert_pathn(e->child, subroute, subroute_len, route_ptr);
2014-05-15 10:57:13 -04:00
} else {
// no more,
e->child->endpoint++; // make it as an endpoint, TODO: put the route value
2014-05-16 03:29:25 -04:00
e->child->route_ptr = route_ptr;
2014-05-15 10:57:13 -04:00
return e->child;
}
} else if ( dl < e->pattern_len ) {
// printf("branch the edge dl: %d\n", dl);
2014-05-15 10:57:13 -04:00
/* it's partically matched with the pattern,
* we should split the end point and make a branch here...
*/
2014-05-16 03:29:25 -04:00
node *c2; // child 1, child 2
edge *e2; // edge 1, edge 2
2014-05-15 10:57:13 -04:00
char * s2 = route + dl;
2014-05-15 12:00:19 -04:00
int s2_len = 0;
2014-05-15 10:57:13 -04:00
2014-05-16 06:57:36 -04:00
r3_edge_branch(e, dl);
2014-05-15 10:57:13 -04:00
// here is the new edge from.
2014-05-16 06:57:36 -04:00
c2 = r3_tree_create(3);
2014-05-15 10:57:13 -04:00
s2_len = route_len - dl;
2014-05-16 06:57:36 -04:00
e2 = r3_edge_create(strndup(s2, s2_len), s2_len, c2);
2014-05-15 10:57:13 -04:00
// printf("edge right: %s\n", e2->pattern);
2014-05-16 06:57:36 -04:00
r3_tree_append_edge(e->child, e2);
2014-05-15 10:57:13 -04:00
// truncate the original edge pattern
free(e->pattern);
e->pattern = strndup(e->pattern, dl);
e->pattern_len = dl;
// move n->edges to c1
c2->endpoint++;
2014-05-16 03:29:25 -04:00
c2->route_ptr = route_ptr;
2014-05-15 10:57:13 -04:00
return c2;
} else {
printf("unexpected condition.");
return NULL;
}
return n;
2014-05-15 06:26:41 -04:00
}
2014-05-16 06:57:36 -04:00
bool r3_node_has_slug_edges(node *n) {
2014-05-16 00:33:59 -04:00
bool found = FALSE;
2014-05-16 03:29:25 -04:00
edge *e;
for ( int i = 0 ; i < n->edge_len ; i++ ) {
2014-05-16 00:33:59 -04:00
e = n->edges[i];
e->has_slug = contains_slug(e->pattern);
if (e->has_slug)
found = TRUE;
}
return found;
}
2014-05-16 03:29:25 -04:00
2014-05-15 06:02:10 -04:00
2014-05-16 06:57:36 -04:00
void r3_tree_dump(node * n, int level) {
if ( n->edge_len ) {
2014-05-16 02:05:51 -04:00
if ( n->combined_pattern ) {
2014-05-16 06:03:52 -04:00
printf(" regexp:%s", n->combined_pattern);
2014-05-16 02:05:51 -04:00
}
2014-05-16 06:03:52 -04:00
printf(" endpoint:%d\n", n->endpoint);
for ( int i = 0 ; i < n->edge_len ; i++ ) {
2014-05-16 03:29:25 -04:00
edge * e = n->edges[i];
2014-05-16 00:33:59 -04:00
print_indent(level);
printf(" |-\"%s\"", e->pattern);
2014-05-16 02:05:51 -04:00
if (e->has_slug) {
printf(" slug:");
printf("%s", compile_slug(e->pattern, e->pattern_len) );
}
if ( e->child && e->child->edges ) {
2014-05-16 06:57:36 -04:00
r3_tree_dump( e->child, level + 1);
2014-05-16 00:33:59 -04:00
}
2014-05-16 02:05:51 -04:00
printf("\n");
2014-05-16 00:33:59 -04:00
}
}
}
2014-05-16 06:03:52 -04:00
/*
2014-05-16 06:57:36 -04:00
char * r3_node_trace(node * n) {
2014-05-16 06:03:52 -04:00
}
*/