optimize: optimized pattern .*, which can be used prefix matching.

This commit is contained in:
Yuansheng 2019-07-01 10:46:51 +08:00
parent 41685d402d
commit d516237aab
6 changed files with 98 additions and 15 deletions

View file

@ -189,7 +189,7 @@ translator, which translates simple patterns into small & fast scanners.
By using this method, r3 reduces the matching overhead of pcre library. By using this method, r3 reduces the matching overhead of pcre library.
Optimized patterns are: `[a-z]+`, `[0-9]+`, `\d+`, `\w+`, `[^/]+` or `[^-]+` Optimized patterns are: `[a-z]+`, `[0-9]+`, `\d+`, `\w+`, `[^/]+`, `[^-]+` or `.*`.
Slugs without specified regular expression will be compiled into the `[^/]+` pattern. therefore, it's optimized too. Slugs without specified regular expression will be compiled into the `[^/]+` pattern. therefore, it's optimized too.

View file

@ -208,7 +208,8 @@ int r3_pattern_to_opcode(const char * pattern, unsigned int len);
enum { NODE_COMPARE_STR, NODE_COMPARE_PCRE, NODE_COMPARE_OPCODE }; enum { NODE_COMPARE_STR, NODE_COMPARE_PCRE, NODE_COMPARE_OPCODE };
enum { OP_EXPECT_MORE_DIGITS = 1, OP_EXPECT_MORE_WORDS, OP_EXPECT_NOSLASH, OP_EXPECT_NODASH, OP_EXPECT_MORE_ALPHA }; enum { OP_EXPECT_MORE_DIGITS = 1, OP_EXPECT_MORE_WORDS, OP_EXPECT_NOSLASH,
OP_EXPECT_NODASH, OP_EXPECT_MORE_ALPHA, OP_EXPECT_NOLINEBREAKS};

View file

@ -223,7 +223,7 @@ int r3_tree_compile_patterns(R3Node * n, char **errstr) {
free(n->combined_pattern); free(n->combined_pattern);
n->combined_pattern = cpat; n->combined_pattern = cpat;
const char *pcre_error; const char *pcre_error = NULL;
int pcre_erroffset; int pcre_erroffset;
unsigned int option_bits = 0; unsigned int option_bits = 0;
@ -250,7 +250,7 @@ int r3_tree_compile_patterns(R3Node * n, char **errstr) {
pcre_free_study(n->pcre_extra); pcre_free_study(n->pcre_extra);
} }
n->pcre_extra = pcre_study(n->pcre_pattern, 0, &pcre_error); n->pcre_extra = pcre_study(n->pcre_pattern, 0, &pcre_error);
if (!n->pcre_extra) { if (!n->pcre_extra && pcre_error) {
if (errstr) { if (errstr) {
int r = asprintf(errstr, "PCRE study failed at offset %s, pattern: %s", pcre_error, n->combined_pattern); int r = asprintf(errstr, "PCRE study failed at offset %s, pattern: %s", pcre_error, n->combined_pattern);
if (r) {}; if (r) {};
@ -312,8 +312,13 @@ R3Node * r3_tree_matchl(const R3Node * n, const char * path, unsigned int path_l
case OP_EXPECT_NODASH: case OP_EXPECT_NODASH:
while (*pp != '-' && pp < pp_end) pp++; while (*pp != '-' && pp < pp_end) pp++;
break; break;
case OP_EXPECT_NOLINEBREAKS:
while (*pp != '\n' && pp < pp_end) pp++;
break;
} }
// check match // check match
if (e->opcode != OP_EXPECT_NOLINEBREAKS) {
if ((pp - path) > 0) { if ((pp - path) > 0) {
if (entry) { if (entry) {
str_array_append(&entry->vars , path, pp - path); str_array_append(&entry->vars , path, pp - path);
@ -324,6 +329,15 @@ R3Node * r3_tree_matchl(const R3Node * n, const char * path, unsigned int path_l
} }
return r3_tree_matchl(e->child, pp, restlen, entry); return r3_tree_matchl(e->child, pp, restlen, entry);
} }
} else {
if (entry) {
str_array_append(&entry->vars , path, pp - path);
}
return e->child && e->child->endpoint ? e->child : NULL;
}
e++; e++;
} }
} }

View file

@ -43,6 +43,9 @@ int r3_pattern_to_opcode(const char * pattern, unsigned int len) {
if ( strncmp(pattern, "[^-]+", len) == 0 ) { if ( strncmp(pattern, "[^-]+", len) == 0 ) {
return OP_EXPECT_NODASH; return OP_EXPECT_NODASH;
} }
if ( strncmp(pattern, ".*", len) == 0 ) {
return OP_EXPECT_NOLINEBREAKS;
}
return 0; return 0;
} }

View file

@ -36,6 +36,9 @@ check_remote_addr_SOURCES = check_remote_addr.c
TESTS += check_http_scheme TESTS += check_http_scheme
check_http_scheme_SOURCES = check_http_scheme.c check_http_scheme_SOURCES = check_http_scheme.c
TESTS += check_routes2
check_routes2_SOURCES = check_routes2.c
if ENABLE_JSON if ENABLE_JSON
TESTS += check_json TESTS += check_json

62
tests/check_routes2.c Normal file
View file

@ -0,0 +1,62 @@
#include "config.h"
#include <stdio.h>
#include <check.h>
#include <stdlib.h>
#include <assert.h>
#include "r3.h"
#include "r3_slug.h"
START_TEST (test_routes2s)
{
R3Node * n = r3_tree_create(10);
match_entry * entry;
R3Route *matched_route;
char * uri0 = "/foo{:.*}";
r3_tree_insert_routel(n, 0, uri0, strlen(uri0), &uri0);
char * err = NULL;
r3_tree_compile(n, &err);
ck_assert(err == NULL);
entry = match_entry_create("/foo/bar");
matched_route = r3_tree_match_route(n, entry);
ck_assert(matched_route != NULL);
ck_assert(matched_route->data == &uri0);
entry = match_entry_create("/foo");
matched_route = r3_tree_match_route(n, entry);
ck_assert(matched_route == NULL);
entry = match_entry_create("/foo/");
matched_route = r3_tree_match_route(n, entry);
ck_assert(matched_route != NULL);
ck_assert(matched_route->data == &uri0);
entry = match_entry_create("/foo/bar/foo/mmasdfasdfasd/f/asdf/as/df");
matched_route = r3_tree_match_route(n, entry);
ck_assert(matched_route != NULL);
ck_assert(matched_route->data == &uri0);
r3_tree_free(n);
}
END_TEST
Suite* r3_suite (void) {
Suite *suite = suite_create("r3 routes2 tests");
TCase *tcase = tcase_create("testcase");
tcase_add_test(tcase, test_routes2s);
suite_add_tcase(suite, tcase);
return suite;
}
int main (int argc, char *argv[]) {
int number_failed;
Suite *suite = r3_suite();
SRunner *runner = srunner_create(suite);
srunner_run_all(runner, CK_NORMAL);
number_failed = srunner_ntests_failed(runner);
srunner_free(runner);
return number_failed;
}