Merge pull request #126 from iresty/optimize-wildcard

optimize: optimized pattern `.*`, which can be used prefix matching.
This commit is contained in:
Yo-An Lin 2019-07-21 19:30:37 +08:00 committed by GitHub
commit 3dac164cec
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 102 additions and 15 deletions

View file

@ -125,7 +125,7 @@ R3Node * matched_node = r3_tree_match_entry(n, entry);
**Release Memory**
To release the memory, you may call `r3_tree_free(R3Node *tree)` to release the whole tree structure,
To release the memory, you may call `r3_tree_free(R3Node *tree)` to release the whole tree structure,
`node*`, `edge*`, `route*` objects that were inserted into the tree will be freed.
@ -189,7 +189,7 @@ translator, which translates simple patterns into small & fast scanners.
By using this method, r3 reduces the matching overhead of pcre library.
Optimized patterns are: `[a-z]+`, `[0-9]+`, `\d+`, `\w+`, `[^/]+` or `[^-]+`
Optimized patterns are: `[a-z]+`, `[0-9]+`, `\d+`, `\w+`, `[^/]+`, `[^-]+` or `.*`.
Slugs without specified regular expression will be compiled into the `[^/]+` pattern. therefore, it's optimized too.
@ -337,10 +337,10 @@ Use case in PHP
```php
// Here is the paths data structure
$paths = [
'/blog/post/{id}' => [ 'controller' => 'PostController' , 'action' => 'item' , 'method' => 'GET' ] ,
'/blog/post' => [ 'controller' => 'PostController' , 'action' => 'list' , 'method' => 'GET' ] ,
'/blog/post' => [ 'controller' => 'PostController' , 'action' => 'create' , 'method' => 'POST' ] ,
'/blog' => [ 'controller' => 'BlogController' , 'action' => 'list' , 'method' => 'GET' ] ,
'/blog/post/{id}' => [ 'controller' => 'PostController' , 'action' => 'item' , 'method' => 'GET' ] ,
'/blog/post' => [ 'controller' => 'PostController' , 'action' => 'list' , 'method' => 'GET' ] ,
'/blog/post' => [ 'controller' => 'PostController' , 'action' => 'create' , 'method' => 'POST' ] ,
'/blog' => [ 'controller' => 'BlogController' , 'action' => 'list' , 'method' => 'GET' ] ,
];
$rs = r3_compile($paths, 'persisten-table-id');
$ret = r3_dispatch($rs, '/blog/post/3' );

View file

@ -208,7 +208,8 @@ int r3_pattern_to_opcode(const char * pattern, unsigned int len);
enum { NODE_COMPARE_STR, NODE_COMPARE_PCRE, NODE_COMPARE_OPCODE };
enum { OP_EXPECT_MORE_DIGITS = 1, OP_EXPECT_MORE_WORDS, OP_EXPECT_NOSLASH, OP_EXPECT_NODASH, OP_EXPECT_MORE_ALPHA };
enum { OP_EXPECT_MORE_DIGITS = 1, OP_EXPECT_MORE_WORDS, OP_EXPECT_NOSLASH,
OP_EXPECT_NODASH, OP_EXPECT_MORE_ALPHA, OP_GREEDY_ANY};

View file

@ -223,7 +223,7 @@ int r3_tree_compile_patterns(R3Node * n, char **errstr) {
free(n->combined_pattern);
n->combined_pattern = cpat;
const char *pcre_error;
const char *pcre_error = NULL;
int pcre_erroffset;
unsigned int option_bits = 0;
@ -250,7 +250,7 @@ int r3_tree_compile_patterns(R3Node * n, char **errstr) {
pcre_free_study(n->pcre_extra);
}
n->pcre_extra = pcre_study(n->pcre_pattern, 0, &pcre_error);
if (!n->pcre_extra) {
if (!n->pcre_extra && pcre_error) {
if (errstr) {
int r = asprintf(errstr, "PCRE study failed at offset %s, pattern: %s", pcre_error, n->combined_pattern);
if (r) {};
@ -312,18 +312,32 @@ R3Node * r3_tree_matchl(const R3Node * n, const char * path, unsigned int path_l
case OP_EXPECT_NODASH:
while (*pp != '-' && pp < pp_end) pp++;
break;
case OP_GREEDY_ANY:
while (*pp != '\n' && pp < pp_end) pp++;
break;
}
// check match
if ((pp - path) > 0) {
if (e->opcode != OP_GREEDY_ANY) {
if ((pp - path) > 0) {
if (entry) {
str_array_append(&entry->vars , path, pp - path);
}
restlen = pp_end - pp;
if (!restlen) {
return e->child && e->child->endpoint ? e->child : NULL;
}
return r3_tree_matchl(e->child, pp, restlen, entry);
}
} else {
if (entry) {
str_array_append(&entry->vars , path, pp - path);
}
restlen = pp_end - pp;
if (!restlen) {
return e->child && e->child->endpoint ? e->child : NULL;
}
return r3_tree_matchl(e->child, pp, restlen, entry);
return e->child && e->child->endpoint ? e->child : NULL;
}
e++;
}
}

View file

@ -43,6 +43,9 @@ int r3_pattern_to_opcode(const char * pattern, unsigned int len) {
if ( strncmp(pattern, "[^-]+", len) == 0 ) {
return OP_EXPECT_NODASH;
}
if ( strncmp(pattern, ".*", len) == 0 ) {
return OP_GREEDY_ANY;
}
return 0;
}

View file

@ -21,6 +21,7 @@ add_r3_test(check_str_array check_str_array.c)
add_r3_test(check_host check_host.c)
add_r3_test(check_http_scheme check_http_scheme.c)
add_r3_test(check_remote_addr check_remote_addr.c)
add_r3_test(check_routes2 check_routes2.c)
add_executable(bench bench.c)

View file

@ -36,6 +36,9 @@ check_remote_addr_SOURCES = check_remote_addr.c
TESTS += check_http_scheme
check_http_scheme_SOURCES = check_http_scheme.c
TESTS += check_routes2
check_routes2_SOURCES = check_routes2.c
if ENABLE_JSON
TESTS += check_json

65
tests/check_routes2.c Normal file
View file

@ -0,0 +1,65 @@
#include "config.h"
#include <stdio.h>
#include <check.h>
#include <stdlib.h>
#include <assert.h>
#include "r3.h"
#include "r3_slug.h"
START_TEST (greedy_pattern)
{
R3Node * n = r3_tree_create(10);
match_entry * entry;
R3Route *matched_route;
char * uri0 = "/foo{:.*}";
r3_tree_insert_routel(n, 0, uri0, strlen(uri0), &uri0);
char * err = NULL;
r3_tree_compile(n, &err);
ck_assert(err == NULL);
entry = match_entry_create("/foo/bar");
matched_route = r3_tree_match_route(n, entry);
ck_assert(matched_route != NULL);
ck_assert(matched_route->data == &uri0);
// fixme: should match
// entry = match_entry_create("/foo");
// matched_route = r3_tree_match_route(n, entry);
// ck_assert(matched_route != NULL);
// ck_assert(matched_route->data == &uri0);
entry = match_entry_create("/foo/");
matched_route = r3_tree_match_route(n, entry);
ck_assert(matched_route != NULL);
ck_assert(matched_route->data == &uri0);
entry = match_entry_create("/foo/bar/foo/mmasdfasdfasd/f/asdf/as/df");
matched_route = r3_tree_match_route(n, entry);
ck_assert(matched_route != NULL);
ck_assert(matched_route->data == &uri0);
r3_tree_free(n);
}
END_TEST
Suite* r3_suite (void) {
Suite *suite = suite_create("r3 routes2 tests");
TCase *tcase = tcase_create("testcase");
tcase_add_test(tcase, greedy_pattern);
suite_add_tcase(suite, tcase);
return suite;
}
int main (int argc, char *argv[]) {
int number_failed;
Suite *suite = r3_suite();
SRunner *runner = srunner_create(suite);
srunner_run_all(runner, CK_NORMAL);
number_failed = srunner_ntests_failed(runner);
srunner_free(runner);
return number_failed;
}