Use PCRE2 instead of PCRE (#153)
PCRE is now at end of life and is no longer actively maintained. Lift the dependency to the next major version, i.e. PCRE2. Implementation notes: - Removed the pcre study option since: "The new API ... was simplified by abolishing the separate "study" optimizing function; in PCRE2, patterns are automatically optimized where possible." - If asprintf() fails the content of the 'strp' variable is undefined. Lets check the return value and return NULL upon error. - Pattern and subject can straightforwardly be cast to PCRE2_SPTR since we only work with 8-bit code units.
This commit is contained in:
parent
9168f7e4d4
commit
c105117b40
13 changed files with 88 additions and 99 deletions
|
@ -10,7 +10,7 @@ apt-get install -qq \
|
|||
cmake \
|
||||
graphviz-dev \
|
||||
libjemalloc-dev \
|
||||
libpcre3-dev \
|
||||
libpcre2-dev \
|
||||
libtool \
|
||||
ninja-build \
|
||||
pkg-config
|
||||
|
|
|
@ -5,7 +5,7 @@ list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
|
|||
set(CMAKE_C_STANDARD 99)
|
||||
|
||||
find_package(Check)
|
||||
find_package(PCRE REQUIRED)
|
||||
find_package(PCRE2 REQUIRED)
|
||||
|
||||
include(CheckSymbolExists)
|
||||
include(CheckIncludeFile)
|
||||
|
|
10
README.md
10
README.md
|
@ -25,7 +25,7 @@ Requirement
|
|||
|
||||
### Runtime Requirement
|
||||
|
||||
* pcre
|
||||
* pcre2
|
||||
* (optional) graphviz version 2.38.0 (20140413.2041)
|
||||
* (optional) libjson-c-dev
|
||||
|
||||
|
@ -187,13 +187,13 @@ Optimization
|
|||
Simple regular expressions are optimized through a regexp pattern to opcode
|
||||
translator, which translates simple patterns into small & fast scanners.
|
||||
|
||||
By using this method, r3 reduces the matching overhead of pcre library.
|
||||
By using this method, r3 reduces the matching overhead of pcre2 library.
|
||||
|
||||
Optimized patterns are: `[a-z]+`, `[0-9]+`, `\d+`, `\w+`, `[^/]+`, `[^-]+` or `.*`.
|
||||
|
||||
Slugs without specified regular expression will be compiled into the `[^/]+` pattern. therefore, it's optimized too.
|
||||
|
||||
Complex regular expressions will still use libpcre to match URL (partially).
|
||||
Complex regular expressions will still use libpcre2 to match URL (partially).
|
||||
|
||||
|
||||
Performance
|
||||
|
@ -356,7 +356,7 @@ if ( $error ) {
|
|||
Install
|
||||
----------------------
|
||||
|
||||
sudo apt-get install check libpcre3 libpcre3-dev libjemalloc-dev libjemalloc1 build-essential libtool automake autoconf pkg-config
|
||||
sudo apt-get install check libpcre2 libpcre2-dev libjemalloc-dev libjemalloc1 build-essential libtool automake autoconf pkg-config
|
||||
sudo apt-get install graphviz-dev graphviz # if you want graphviz
|
||||
./autogen.sh
|
||||
./configure && make
|
||||
|
@ -364,7 +364,7 @@ Install
|
|||
|
||||
And we support debian-based distro now!
|
||||
|
||||
sudo apt-get install build-essential autoconf automake libpcre3-dev pkg-config debhelper libtool check
|
||||
sudo apt-get install build-essential autoconf automake libpcre2-dev pkg-config debhelper libtool check
|
||||
mv dist-debian debian
|
||||
dpkg-buildpackage -b -us -uc
|
||||
sudo gdebi ../libr3*.deb
|
||||
|
|
|
@ -1,37 +0,0 @@
|
|||
# Copyright (C) 2007-2009 LuaDist.
|
||||
# Created by Peter Kapec <kapecp@gmail.com>
|
||||
# Redistribution and use of this file is allowed according to the terms of the MIT license.
|
||||
# For details see the COPYRIGHT file distributed with LuaDist.
|
||||
# Note:
|
||||
# Searching headers and libraries is very simple and is NOT as powerful as scripts
|
||||
# distributed with CMake, because LuaDist defines directories to search for.
|
||||
# Everyone is encouraged to contact the author with improvements. Maybe this file
|
||||
# becomes part of CMake distribution sometimes.
|
||||
|
||||
# - Find pcre
|
||||
# Find the native PCRE headers and libraries.
|
||||
#
|
||||
# PCRE_INCLUDE_DIRS - where to find pcre.h, etc.
|
||||
# PCRE_LIBRARIES - List of libraries when using pcre.
|
||||
# PCRE_FOUND - True if pcre found.
|
||||
|
||||
# Look for the header file.
|
||||
FIND_PATH(PCRE_INCLUDE_DIR NAMES pcre.h)
|
||||
|
||||
# Look for the library.
|
||||
FIND_LIBRARY(PCRE_LIBRARY NAMES pcre)
|
||||
|
||||
# Handle the QUIETLY and REQUIRED arguments and set PCRE_FOUND to TRUE if all listed variables are TRUE.
|
||||
INCLUDE(FindPackageHandleStandardArgs)
|
||||
FIND_PACKAGE_HANDLE_STANDARD_ARGS(PCRE DEFAULT_MSG PCRE_LIBRARY PCRE_INCLUDE_DIR)
|
||||
|
||||
# Copy the results to the output variables.
|
||||
IF(PCRE_FOUND)
|
||||
SET(PCRE_LIBRARIES ${PCRE_LIBRARY})
|
||||
SET(PCRE_INCLUDE_DIRS ${PCRE_INCLUDE_DIR})
|
||||
ELSE(PCRE_FOUND)
|
||||
SET(PCRE_LIBRARIES)
|
||||
SET(PCRE_INCLUDE_DIRS)
|
||||
ENDIF(PCRE_FOUND)
|
||||
|
||||
MARK_AS_ADVANCED(PCRE_INCLUDE_DIRS PCRE_LIBRARIES)
|
37
cmake/Modules/FindPCRE2.cmake
Normal file
37
cmake/Modules/FindPCRE2.cmake
Normal file
|
@ -0,0 +1,37 @@
|
|||
# Copyright (C) 2007-2009 LuaDist.
|
||||
# Created by Peter Kapec <kapecp@gmail.com>
|
||||
# Redistribution and use of this file is allowed according to the terms of the MIT license.
|
||||
# For details see the COPYRIGHT file distributed with LuaDist.
|
||||
# Note:
|
||||
# Searching headers and libraries is very simple and is NOT as powerful as scripts
|
||||
# distributed with CMake, because LuaDist defines directories to search for.
|
||||
# Everyone is encouraged to contact the author with improvements. Maybe this file
|
||||
# becomes part of CMake distribution sometimes.
|
||||
|
||||
# - Find pcre2
|
||||
# Find the native PCRE2 headers and libraries.
|
||||
#
|
||||
# PCRE2_INCLUDE_DIRS - where to find pcre2.h, etc.
|
||||
# PCRE2_LIBRARIES - List of libraries when using pcre2.
|
||||
# PCRE2_FOUND - True if pcre2 found.
|
||||
|
||||
# Look for the header file.
|
||||
FIND_PATH(PCRE2_INCLUDE_DIR NAMES pcre2.h)
|
||||
|
||||
# Look for the library.
|
||||
FIND_LIBRARY(PCRE2_LIBRARY NAMES pcre2-8)
|
||||
|
||||
# Handle the QUIETLY and REQUIRED arguments and set PCRE2_FOUND to TRUE if all listed variables are TRUE.
|
||||
INCLUDE(FindPackageHandleStandardArgs)
|
||||
FIND_PACKAGE_HANDLE_STANDARD_ARGS(PCRE2 DEFAULT_MSG PCRE2_LIBRARY PCRE2_INCLUDE_DIR)
|
||||
|
||||
# Copy the results to the output variables.
|
||||
IF(PCRE2_FOUND)
|
||||
SET(PCRE2_LIBRARIES ${PCRE2_LIBRARY})
|
||||
SET(PCRE2_INCLUDE_DIRS ${PCRE2_INCLUDE_DIR})
|
||||
ELSE(PCRE2_FOUND)
|
||||
SET(PCRE2_LIBRARIES)
|
||||
SET(PCRE2_INCLUDE_DIRS)
|
||||
ENDIF(PCRE2_FOUND)
|
||||
|
||||
MARK_AS_ADVANCED(PCRE2_INCLUDE_DIRS PCRE2_LIBRARIES)
|
|
@ -73,7 +73,7 @@ AM_CONDITIONAL(USE_JEMALLOC, test "x$have_jemalloc" = "xyes")
|
|||
# AC_DEFINE(USE_JEMALLOC, test "x$found_jemalloc" = "xyes" , "use jemalloc")
|
||||
|
||||
|
||||
PKG_CHECK_MODULES(DEPS, [libpcre])
|
||||
PKG_CHECK_MODULES(DEPS, [libpcre2-8])
|
||||
AC_SUBST(DEPS_CFLAGS)
|
||||
AC_SUBST(DEPS_LIBS)
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@ Source: libr3
|
|||
Priority: optional
|
||||
Maintainer: Ronmi Ren <ronmi.ren@gmail.com>
|
||||
Build-Depends: debhelper (>= 8.0.0), automake, autotools-dev, autoconf,
|
||||
libtool, libpcre3-dev, pkg-config, check
|
||||
libtool, libpcre2-dev, pkg-config, check
|
||||
Standards-Version: 3.9.4
|
||||
Section: libs
|
||||
Homepage: https://github.com/c9s/r3
|
||||
|
|
|
@ -10,7 +10,8 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <pcre.h>
|
||||
#define PCRE2_CODE_UNIT_WIDTH 8
|
||||
#include <pcre2.h>
|
||||
|
||||
#if __STDC_VERSION__ <= 201710L
|
||||
#ifdef HAVE_STDBOOL_H
|
||||
|
@ -43,13 +44,12 @@ struct _node {
|
|||
R3_VECTOR(R3Edge) edges;
|
||||
R3_VECTOR(R3Route) routes;
|
||||
char * combined_pattern;
|
||||
pcre * pcre_pattern;
|
||||
pcre_extra * pcre_extra;
|
||||
pcre2_code * pcre_pattern;
|
||||
pcre2_match_data * match_data;
|
||||
|
||||
// edges are mostly less than 255
|
||||
unsigned int compare_type; // compare_type: pcre, opcode, string
|
||||
unsigned int endpoint; // endpoint, should be zero for non-endpoint nodes
|
||||
unsigned int ov_cnt; // capture vector array size for pcre
|
||||
|
||||
// the pointer of R3Route data
|
||||
void * data;
|
||||
|
|
2
r3.pc.in
2
r3.pc.in
|
@ -6,6 +6,6 @@ libdir=@libdir@
|
|||
Name: r3
|
||||
Description: High-performance URL router library
|
||||
Version: @PACKAGE_VERSION@
|
||||
Requires: libpcre
|
||||
Requires: libpcre2-8
|
||||
Libs: -L${libdir} -lr3
|
||||
CFlags: -I${includedir}
|
||||
|
|
|
@ -19,7 +19,7 @@ target_include_directories(r3
|
|||
|
||||
target_link_libraries(r3
|
||||
PUBLIC
|
||||
${PCRE_LIBRARIES})
|
||||
${PCRE2_LIBRARIES})
|
||||
|
||||
install(
|
||||
TARGETS r3
|
||||
|
|
|
@ -13,8 +13,6 @@
|
|||
// Jemalloc memory management
|
||||
// #include <jemalloc/jemalloc.h>
|
||||
|
||||
// PCRE
|
||||
#include <pcre.h>
|
||||
#include "r3.h"
|
||||
#include "r3_slug.h"
|
||||
#include "slug.h"
|
||||
|
|
|
@ -8,7 +8,6 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <pcre.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "r3.h"
|
||||
|
|
80
src/node.c
80
src/node.c
|
@ -7,9 +7,6 @@
|
|||
#include <netinet/in.h>
|
||||
#include <arpa/inet.h>
|
||||
|
||||
// PCRE
|
||||
#include <pcre.h>
|
||||
|
||||
#include "r3.h"
|
||||
#include "r3_slug.h"
|
||||
#include "slug.h"
|
||||
|
@ -75,13 +72,11 @@ void r3_tree_free(R3Node * tree) {
|
|||
}
|
||||
free(tree->routes.entries);
|
||||
if (tree->pcre_pattern) {
|
||||
pcre_free(tree->pcre_pattern);
|
||||
pcre2_code_free(tree->pcre_pattern);
|
||||
}
|
||||
#ifdef PCRE_STUDY_JIT_COMPILE
|
||||
if (tree->pcre_extra) {
|
||||
pcre_free_study(tree->pcre_extra);
|
||||
if (tree->match_data) {
|
||||
pcre2_match_data_free(tree->match_data);
|
||||
}
|
||||
#endif
|
||||
free(tree->combined_pattern);
|
||||
free(tree);
|
||||
tree = NULL;
|
||||
|
@ -223,41 +218,44 @@ int r3_tree_compile_patterns(R3Node * n, char **errstr) {
|
|||
free(n->combined_pattern);
|
||||
n->combined_pattern = cpat;
|
||||
|
||||
const char *pcre_error = NULL;
|
||||
int pcre_erroffset = 0;
|
||||
int pcre_errorcode = 0;
|
||||
PCRE2_SIZE pcre_erroffset = 0;
|
||||
unsigned int option_bits = 0;
|
||||
|
||||
n->ov_cnt = (1 + n->edges.size) * 3;
|
||||
|
||||
if (n->pcre_pattern) {
|
||||
pcre_free(n->pcre_pattern);
|
||||
pcre2_code_free(n->pcre_pattern);
|
||||
}
|
||||
n->pcre_pattern = pcre_compile(
|
||||
n->combined_pattern, /* the pattern */
|
||||
n->pcre_pattern = pcre2_compile(
|
||||
(PCRE2_SPTR)n->combined_pattern, /* the pattern, 8-bit code units */
|
||||
PCRE2_ZERO_TERMINATED,
|
||||
option_bits, /* default options */
|
||||
&pcre_error, /* for error message */
|
||||
&pcre_errorcode, /* for error code */
|
||||
&pcre_erroffset, /* for error offset */
|
||||
NULL); /* use default character tables */
|
||||
NULL); /* compile context */
|
||||
if (n->pcre_pattern == NULL) {
|
||||
if (errstr) {
|
||||
int r = asprintf(errstr, "PCRE compilation failed at offset %d: %s, pattern: %s", pcre_erroffset, pcre_error, n->combined_pattern);
|
||||
if (r) {};
|
||||
PCRE2_UCHAR buf[128];
|
||||
pcre2_get_error_message(pcre_errorcode, buf, sizeof(buf));
|
||||
int r = asprintf(errstr, "PCRE compilation failed at offset %ld: %s, pattern: %s", pcre_erroffset, buf, n->combined_pattern);
|
||||
if (r < 0) {
|
||||
*errstr = NULL; /* the content of errstr is undefined when asprintf() fails */
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
#ifdef PCRE_STUDY_JIT_COMPILE
|
||||
if (n->pcre_extra) {
|
||||
pcre_free_study(n->pcre_extra);
|
||||
if (n->match_data) {
|
||||
pcre2_match_data_free(n->match_data);
|
||||
}
|
||||
n->pcre_extra = pcre_study(n->pcre_pattern, 0, &pcre_error);
|
||||
if (!n->pcre_extra && pcre_error) {
|
||||
n->match_data = pcre2_match_data_create_from_pattern(n->pcre_pattern, NULL);
|
||||
if (n->match_data == NULL) {
|
||||
if (errstr) {
|
||||
int r = asprintf(errstr, "PCRE study failed at offset %s, pattern: %s", pcre_error, n->combined_pattern);
|
||||
if (r) {};
|
||||
int r = asprintf(errstr, "Failed to allocate match data block");
|
||||
if (r < 0) {
|
||||
*errstr = NULL; /* the content of errstr is undefined when asprintf() fails */
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -339,20 +337,18 @@ static R3Node * r3_tree_matchl_base(const R3Node * n, const char * path,
|
|||
info("COMPARE PCRE_PATTERN\n");
|
||||
const char *substring_start = 0;
|
||||
int substring_length = 0;
|
||||
int ov[ n->ov_cnt ];
|
||||
int rc;
|
||||
|
||||
info("pcre matching %s on [%s]\n", n->combined_pattern, path);
|
||||
|
||||
rc = pcre_exec(
|
||||
rc = pcre2_match(
|
||||
n->pcre_pattern, /* the compiled pattern */
|
||||
n->pcre_extra,
|
||||
path, /* the subject string */
|
||||
(PCRE2_SPTR)path,/* the subject string, 8-bit code units */
|
||||
path_len, /* the length of the subject */
|
||||
0, /* start at offset 0 in the subject */
|
||||
0, /* default options */
|
||||
ov, /* output vector for substring information */
|
||||
n->ov_cnt); /* number of elements in the output vector */
|
||||
n->match_data,/* match data results */
|
||||
NULL); /* match context */
|
||||
|
||||
// does not match all edges, return NULL;
|
||||
if (rc < 0) {
|
||||
|
@ -360,7 +356,7 @@ static R3Node * r3_tree_matchl_base(const R3Node * n, const char * path,
|
|||
printf("pcre rc: %d\n", rc );
|
||||
switch(rc)
|
||||
{
|
||||
case PCRE_ERROR_NOMATCH:
|
||||
case PCRE2_ERROR_NOMATCH:
|
||||
printf("pcre: no match '%s' on pattern '%s'\n", path, n->combined_pattern);
|
||||
break;
|
||||
|
||||
|
@ -373,23 +369,22 @@ static R3Node * r3_tree_matchl_base(const R3Node * n, const char * path,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
PCRE2_SIZE *ov = pcre2_get_ovector_pointer(n->match_data);
|
||||
|
||||
restlen = path_len - ov[1]; // if it's fully matched to the end (rest string length)
|
||||
int *inv = ov + 2;
|
||||
|
||||
if (!restlen) {
|
||||
// Check the substring to decide we should go deeper on which edge
|
||||
for (i = 1; i < rc; i++)
|
||||
{
|
||||
substring_length = *(inv+1) - *inv;
|
||||
substring_length = ov[2*i+1] - ov[2*i];
|
||||
|
||||
// if it's not matched for this edge, just skip them quickly
|
||||
if (!is_end && !substring_length) {
|
||||
inv += 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
substring_start = path + *inv;
|
||||
substring_start = path + ov[2*i];
|
||||
e = n->edges.entries + i - 1;
|
||||
|
||||
if (entry && e->has_slug) {
|
||||
|
@ -404,18 +399,16 @@ static R3Node * r3_tree_matchl_base(const R3Node * n, const char * path,
|
|||
|
||||
|
||||
// Check the substring to decide we should go deeper on which edge
|
||||
inv = ov + 2;
|
||||
for (i = 1; i < rc; i++)
|
||||
{
|
||||
substring_length = *(inv+1) - *inv;
|
||||
substring_length = ov[2*i+1] - ov[2*i];
|
||||
|
||||
// if it's not matched for this edge, just skip them quickly
|
||||
if (!is_end && !substring_length) {
|
||||
inv += 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
substring_start = path + *inv;
|
||||
substring_start = path + ov[2*i];
|
||||
e = n->edges.entries + i - 1;
|
||||
|
||||
if (entry && e->has_slug) {
|
||||
|
@ -520,7 +513,6 @@ inline R3Edge * r3_node_find_edge_str(const R3Node * n, const char * str, int st
|
|||
// n->endpoint = 0;
|
||||
// n->combined_pattern = NULL;
|
||||
// n->pcre_pattern = NULL;
|
||||
// n->pcre_extra = NULL;
|
||||
// n->data = NULL;
|
||||
// return n;
|
||||
// }
|
||||
|
|
Loading…
Reference in a new issue