From e4eada3250f37a0885e8b63e206ee650e070a09f Mon Sep 17 00:00:00 2001 From: c9s Date: Thu, 5 Jun 2014 07:43:51 +0800 Subject: [PATCH] pcre match optimization --- bench_str.csv | 11 +++++++++++ src/node.c | 43 +++++++++++++++++++++++++++++++++---------- 2 files changed, 44 insertions(+), 10 deletions(-) diff --git a/bench_str.csv b/bench_str.csv index 1ba2728..71f97b2 100644 --- a/bench_str.csv +++ b/bench_str.csv @@ -577,3 +577,14 @@ 1401883468,11467197.28,4422749.94,72315.59,2289153.70 1401883480,11087024.10,4312175.00,55188.21,2296830.48 1401883491,10824939.64,4586578.13,71089.90,2213391.27 +1401925066,10935008.42,4088160.37,77672.30,2302477.76 +1401925276,10973173.75,4511027.12,49932.19,2329103.54 +1401925287,11654811.40,4536210.26,72315.59,2379382.00 +1401925299,11573243.70,3894464.26,59074.70,2276455.83 +1401925310,11944273.21,4666625.62,62601.55,2304322.33 +1401925322,11775622.43,3945455.94,43690.67,2149656.21 +1401925333,11539429.12,4630751.73,43240.25,2270121.49 +1401925344,11312437.08,4589657.39,62601.55,2329731.93 +1401925356,10802425.91,3920785.56,55924.05,2346753.13 +1401925370,8139871.96,2999119.49,61680.94,2067285.17 +1401925386,5893728.90,3762869.89,32263.88,1609723.24 diff --git a/src/node.c b/src/node.c index 17329c4..5ce3e14 100644 --- a/src/node.c +++ b/src/node.c @@ -361,28 +361,51 @@ node * r3_tree_matchl(const node * n, const char * path, int path_len, match_ent } + + restlen = path_len - ov[1]; // if it's fully matched to the end (rest string length) + + if (restlen == 0 ) { + // Check the substring to decide we should go deeper on which edge + for (i = 1; i < rc; i++) + { + substring_length = ov[2*i+1] - ov[2*i]; + + // if it's not matched for this edge, just skip them quickly + if (substring_length == 0) + continue; + + substring_start = path + ov[2*i]; + e = n->edges[i - 1]; + + if (entry && e->has_slug) { + // append captured token to entry + str_array_append(entry->vars , zstrndup(substring_start, substring_length)); + } + + // since restlen == 0 return the edge quickly. + return e->child && e->child->endpoint > 0 ? e->child : NULL; + } + } + + + // Check the substring to decide we should go deeper on which edge for (i = 1; i < rc; i++) { - substring_start = path + ov[2*i]; substring_length = ov[2*i+1] - ov[2*i]; - // info("%2d: %.*s\n", i, substring_length, substring_start); + // if it's not matched for this edge, just skip them quickly if ( substring_length == 0) { continue; } - restlen = path_len - ov[1]; // fully match to the end - // info("matched item => restlen:%d edges:%d i:%d\n", restlen, n->edge_len, i); - + substring_start = path + ov[2*i]; e = n->edges[i - 1]; if (entry && e->has_slug) { // append captured token to entry str_array_append(entry->vars , zstrndup(substring_start, substring_length)); } - if (restlen == 0 ) { - return e->child && e->child->endpoint > 0 ? e->child : NULL; - } + // get the length of orginal string: $0 return r3_tree_matchl( e->child, path + (ov[1] - ov[0]), restlen, entry); } @@ -419,8 +442,8 @@ route * r3_tree_match_route(const node *tree, match_entry * entry) { inline edge * r3_node_find_edge_str(const node * n, const char * str, int str_len) { char firstbyte = *str; - unsigned int i = n->edge_len; - while (i--) { + unsigned int i; + for (i = n->edge_len; i--; ) { if ( firstbyte == *(n->edges[i]->pattern) ) { info("matching '%s' with '%s'\n", str, node_edge_pattern(n,i) ); if ( strncmp( node_edge_pattern(n,i), str, node_edge_pattern_len(n,i) ) == 0 ) {