[perl-re-engine-RE2: 6/8] Remove the re2 folder separately from the patch
Mathieu Bridon
bochecha at fedoraproject.org
Thu Mar 7 06:05:34 UTC 2013
commit 4711d7036a0d9e403f66b49e6e66646d2fcb3176
Author: Mathieu Bridon <bochecha at fedoraproject.org>
Date: Wed Mar 6 12:11:29 2013 +0800
Remove the re2 folder separately from the patch
The previous patch was looking awfully complex, when most of what it did
was a simple `rm -fr re2`.
This makes it much more readable, so it's easier to focus on the changes
necessary to build against the system lib.
perl-re-engine-RE2.spec | 8 +-
re-engine-RE2-0.11-Unbundle-re2.patch |39372 +--------------------------------
2 files changed, 13 insertions(+), 39367 deletions(-)
---
diff --git a/perl-re-engine-RE2.spec b/perl-re-engine-RE2.spec
index 8b6b344..7f77864 100644
--- a/perl-re-engine-RE2.spec
+++ b/perl-re-engine-RE2.spec
@@ -1,7 +1,7 @@
Name: perl-re-engine-RE2
Summary: RE2 regex engine
Version: 0.11
-Release: 2%{?dist}
+Release: 3%{?dist}
License: GPL+ or Artistic
URL: http://search.cpan.org/dist/re-engine-RE2/
Source0: http://www.cpan.org/authors/id/D/DG/DGL/re-engine-RE2-%{version}.tar.gz
@@ -30,6 +30,9 @@ This module replaces perl's regex engine in a given lexical scope with RE2.
# Remove incorrect executable bits
chmod -x lib/re/engine/RE2.pm
+# Just to be sure we don't build against the bundled version
+rm -fr re2
+
%patch0 -p1
@@ -59,6 +62,9 @@ make test
%changelog
+* Wed Mar 06 2013 Mathieu Bridon <bochecha at fedoraproject.org> - 0.11-3
+- Petr is right, the patch does look much more readable this way.
+
* Thu Feb 21 2013 Mathieu Bridon <bochecha at fedoraproject.org> - 0.11-2
- Remove incorrect executable bits.
- Add a missing build requirement.
diff --git a/re-engine-RE2-0.11-Unbundle-re2.patch b/re-engine-RE2-0.11-Unbundle-re2.patch
index 2b2d54b..af24838 100644
--- a/re-engine-RE2-0.11-Unbundle-re2.patch
+++ b/re-engine-RE2-0.11-Unbundle-re2.patch
@@ -1,235 +1,19 @@
-From 247b404dd1d6cf652fb1df6f78a12c002c3b9902 Mon Sep 17 00:00:00 2001
+From 6b2e39f496d852f68460c41dc32f2309d94b1b84 Mon Sep 17 00:00:00 2001
From: Mathieu Bridon <bochecha at fedoraproject.org>
Date: Wed, 20 Feb 2013 15:15:41 +0800
Subject: [PATCH] Unbundle re2
We want to build against the system version of the library.
---
- MANIFEST | 108 -
- Makefile.PL | 11 +-
- re2/.hgignore | 7 -
- re2/AUTHORS | 12 -
- re2/CONTRIBUTORS | 33 -
- re2/LICENSE | 27 -
- re2/Makefile | 287 --
- re2/README | 19 -
- re2/libre2.symbols | 15 -
- re2/libre2.symbols.darwin | 11 -
- re2/re2/Makefile | 1 -
- re2/re2/bitstate.cc | 378 ---
- re2/re2/compile.cc | 1138 -------
- re2/re2/dfa.cc | 2086 -------------
- re2/re2/filtered_re2.cc | 100 -
- re2/re2/filtered_re2.h | 101 -
- re2/re2/make_perl_groups.pl | 110 -
- re2/re2/make_unicode_casefold.py | 146 -
- re2/re2/make_unicode_groups.py | 111 -
- re2/re2/mimics_pcre.cc | 185 --
- re2/re2/nfa.cc | 709 -----
- re2/re2/onepass.cc | 614 ----
- re2/re2/parse.cc | 2202 --------------
- re2/re2/perl_groups.cc | 119 -
- re2/re2/prefilter.cc | 671 -----
- re2/re2/prefilter.h | 105 -
- re2/re2/prefilter_tree.cc | 398 ---
- re2/re2/prefilter_tree.h | 130 -
- re2/re2/prog.cc | 341 ---
- re2/re2/prog.h | 376 ---
- re2/re2/re2.cc | 1182 --------
- re2/re2/re2.h | 837 -----
- re2/re2/regexp.cc | 920 ------
- re2/re2/regexp.h | 632 ----
- re2/re2/set.cc | 113 -
- re2/re2/set.h | 55 -
- re2/re2/simplify.cc | 393 ---
- re2/re2/stringpiece.h | 182 --
- re2/re2/testing/backtrack.cc | 254 --
- re2/re2/testing/charclass_test.cc | 223 --
- re2/re2/testing/compile_test.cc | 171 --
- re2/re2/testing/dfa_test.cc | 343 ---
- re2/re2/testing/dump.cc | 164 -
- re2/re2/testing/exhaustive1_test.cc | 42 -
- re2/re2/testing/exhaustive2_test.cc | 70 -
- re2/re2/testing/exhaustive3_test.cc | 94 -
- re2/re2/testing/exhaustive_test.cc | 38 -
- re2/re2/testing/exhaustive_tester.cc | 188 --
- re2/re2/testing/exhaustive_tester.h | 85 -
- re2/re2/testing/filtered_re2_test.cc | 258 --
- re2/re2/testing/mimics_pcre_test.cc | 76 -
- re2/re2/testing/null_walker.cc | 44 -
- re2/re2/testing/parse_test.cc | 376 ---
- re2/re2/testing/possible_match_test.cc | 240 --
- re2/re2/testing/random_test.cc | 95 -
- re2/re2/testing/re2_arg_test.cc | 132 -
- re2/re2/testing/re2_test.cc | 1363 ---------
- re2/re2/testing/regexp_benchmark.cc | 1461 ---------
- re2/re2/testing/regexp_generator.cc | 264 --
- re2/re2/testing/regexp_generator.h | 70 -
- re2/re2/testing/regexp_test.cc | 81 -
- re2/re2/testing/required_prefix_test.cc | 67 -
- re2/re2/testing/search_test.cc | 325 --
- re2/re2/testing/set_test.cc | 102 -
- re2/re2/testing/simplify_test.cc | 167 -
- re2/re2/testing/string_generator.cc | 113 -
- re2/re2/testing/string_generator.h | 58 -
- re2/re2/testing/string_generator_test.cc | 109 -
- re2/re2/testing/tester.cc | 640 ----
- re2/re2/testing/tester.h | 121 -
- re2/re2/testing/unicode_test.py | 207 --
- re2/re2/tostring.cc | 341 ---
- re2/re2/unicode.py | 297 --
- re2/re2/unicode_casefold.cc | 469 ---
- re2/re2/unicode_casefold.h | 75 -
- re2/re2/unicode_groups.cc | 4851 ------------------------------
- re2/re2/unicode_groups.h | 64 -
- re2/re2/variadic_function.h | 346 ---
- re2/re2/walker-inl.h | 244 --
- re2/runtests | 21 -
- re2/testinstall.cc | 13 -
- re2/util/arena.cc | 168 -
- re2/util/arena.h | 103 -
- re2/util/atomicops.h | 79 -
- re2/util/benchmark.cc | 153 -
- re2/util/benchmark.h | 41 -
- re2/util/flags.h | 27 -
- re2/util/hash.cc | 231 --
- re2/util/logging.h | 78 -
- re2/util/mutex.h | 190 --
- re2/util/pcre.cc | 961 ------
- re2/util/pcre.h | 679 -----
- re2/util/random.cc | 34 -
- re2/util/random.h | 29 -
- re2/util/rune.cc | 258 --
- re2/util/sparse_array.h | 451 ---
- re2/util/sparse_array_test.cc | 150 -
- re2/util/sparse_set.h | 177 --
- re2/util/stringpiece.cc | 87 -
- re2/util/stringprintf.cc | 78 -
- re2/util/strutil.cc | 97 -
- re2/util/test.cc | 39 -
- re2/util/test.h | 57 -
- re2/util/thread.cc | 44 -
- re2/util/thread.h | 26 -
- re2/util/utf.h | 43 -
- re2/util/util.h | 127 -
- re2/util/valgrind.cc | 24 -
- re2/util/valgrind.h | 4517 ----------------------------
- t/00.re2-tests.t | 35 -
- 110 files changed, 2 insertions(+), 38608 deletions(-)
- delete mode 100644 re2/.hgignore
- delete mode 100644 re2/AUTHORS
- delete mode 100644 re2/CONTRIBUTORS
- delete mode 100644 re2/LICENSE
- delete mode 100644 re2/Makefile
- delete mode 100644 re2/README
- delete mode 100644 re2/libre2.symbols
- delete mode 100644 re2/libre2.symbols.darwin
- delete mode 100644 re2/re2/Makefile
- delete mode 100644 re2/re2/bitstate.cc
- delete mode 100644 re2/re2/compile.cc
- delete mode 100644 re2/re2/dfa.cc
- delete mode 100644 re2/re2/filtered_re2.cc
- delete mode 100644 re2/re2/filtered_re2.h
- delete mode 100755 re2/re2/make_perl_groups.pl
- delete mode 100755 re2/re2/make_unicode_casefold.py
- delete mode 100755 re2/re2/make_unicode_groups.py
- delete mode 100644 re2/re2/mimics_pcre.cc
- delete mode 100644 re2/re2/nfa.cc
- delete mode 100644 re2/re2/onepass.cc
- delete mode 100644 re2/re2/parse.cc
- delete mode 100644 re2/re2/perl_groups.cc
- delete mode 100644 re2/re2/prefilter.cc
- delete mode 100644 re2/re2/prefilter.h
- delete mode 100644 re2/re2/prefilter_tree.cc
- delete mode 100644 re2/re2/prefilter_tree.h
- delete mode 100644 re2/re2/prog.cc
- delete mode 100644 re2/re2/prog.h
- delete mode 100644 re2/re2/re2.cc
- delete mode 100644 re2/re2/re2.h
- delete mode 100644 re2/re2/regexp.cc
- delete mode 100644 re2/re2/regexp.h
- delete mode 100644 re2/re2/set.cc
- delete mode 100644 re2/re2/set.h
- delete mode 100644 re2/re2/simplify.cc
- delete mode 100644 re2/re2/stringpiece.h
- delete mode 100644 re2/re2/testing/backtrack.cc
- delete mode 100644 re2/re2/testing/charclass_test.cc
- delete mode 100644 re2/re2/testing/compile_test.cc
- delete mode 100644 re2/re2/testing/dfa_test.cc
- delete mode 100644 re2/re2/testing/dump.cc
- delete mode 100644 re2/re2/testing/exhaustive1_test.cc
- delete mode 100644 re2/re2/testing/exhaustive2_test.cc
- delete mode 100644 re2/re2/testing/exhaustive3_test.cc
- delete mode 100644 re2/re2/testing/exhaustive_test.cc
- delete mode 100644 re2/re2/testing/exhaustive_tester.cc
- delete mode 100644 re2/re2/testing/exhaustive_tester.h
- delete mode 100644 re2/re2/testing/filtered_re2_test.cc
- delete mode 100644 re2/re2/testing/mimics_pcre_test.cc
- delete mode 100644 re2/re2/testing/null_walker.cc
- delete mode 100644 re2/re2/testing/parse_test.cc
- delete mode 100644 re2/re2/testing/possible_match_test.cc
- delete mode 100644 re2/re2/testing/random_test.cc
- delete mode 100644 re2/re2/testing/re2_arg_test.cc
- delete mode 100644 re2/re2/testing/re2_test.cc
- delete mode 100644 re2/re2/testing/regexp_benchmark.cc
- delete mode 100644 re2/re2/testing/regexp_generator.cc
- delete mode 100644 re2/re2/testing/regexp_generator.h
- delete mode 100644 re2/re2/testing/regexp_test.cc
- delete mode 100644 re2/re2/testing/required_prefix_test.cc
- delete mode 100644 re2/re2/testing/search_test.cc
- delete mode 100644 re2/re2/testing/set_test.cc
- delete mode 100644 re2/re2/testing/simplify_test.cc
- delete mode 100644 re2/re2/testing/string_generator.cc
- delete mode 100644 re2/re2/testing/string_generator.h
- delete mode 100644 re2/re2/testing/string_generator_test.cc
- delete mode 100644 re2/re2/testing/tester.cc
- delete mode 100644 re2/re2/testing/tester.h
- delete mode 100755 re2/re2/testing/unicode_test.py
- delete mode 100644 re2/re2/tostring.cc
- delete mode 100755 re2/re2/unicode.py
- delete mode 100644 re2/re2/unicode_casefold.cc
- delete mode 100644 re2/re2/unicode_casefold.h
- delete mode 100644 re2/re2/unicode_groups.cc
- delete mode 100644 re2/re2/unicode_groups.h
- delete mode 100644 re2/re2/variadic_function.h
- delete mode 100644 re2/re2/walker-inl.h
- delete mode 100755 re2/runtests
- delete mode 100644 re2/testinstall.cc
- delete mode 100644 re2/util/arena.cc
- delete mode 100644 re2/util/arena.h
- delete mode 100644 re2/util/atomicops.h
- delete mode 100644 re2/util/benchmark.cc
- delete mode 100644 re2/util/benchmark.h
- delete mode 100644 re2/util/flags.h
- delete mode 100644 re2/util/hash.cc
- delete mode 100644 re2/util/logging.h
- delete mode 100644 re2/util/mutex.h
- delete mode 100644 re2/util/pcre.cc
- delete mode 100644 re2/util/pcre.h
- delete mode 100644 re2/util/random.cc
- delete mode 100644 re2/util/random.h
- delete mode 100644 re2/util/rune.cc
- delete mode 100644 re2/util/sparse_array.h
- delete mode 100644 re2/util/sparse_array_test.cc
- delete mode 100644 re2/util/sparse_set.h
- delete mode 100644 re2/util/stringpiece.cc
- delete mode 100644 re2/util/stringprintf.cc
- delete mode 100644 re2/util/strutil.cc
- delete mode 100644 re2/util/test.cc
- delete mode 100644 re2/util/test.h
- delete mode 100644 re2/util/thread.cc
- delete mode 100644 re2/util/thread.h
- delete mode 100644 re2/util/utf.h
- delete mode 100644 re2/util/util.h
- delete mode 100644 re2/util/valgrind.cc
- delete mode 100644 re2/util/valgrind.h
- delete mode 100644 t/00.re2-tests.t
+ MANIFEST | 107 -----------------------------------------------------------
+ Makefile.PL | 11 +-----
+ 2 files changed, 2 insertions(+), 116 deletions(-)
diff --git a/MANIFEST b/MANIFEST
-index 8eac875..21ed033 100644
+index 8eac875..0b33546 100644
--- a/MANIFEST
+++ b/MANIFEST
-@@ -7,118 +7,10 @@ MANIFEST This list of files
+@@ -7,113 +7,6 @@ MANIFEST This list of files
MANIFEST.SKIP
ppport.h
RE2.xs
@@ -343,11 +127,6 @@ index 8eac875..21ed033 100644
re2_xs.cc
re2_xs.h
README
- t/00.compile.t
--t/00.re2-tests.t
- t/01.basic.t
- t/02.chars.t
- t/03.modifiers.t
diff --git a/Makefile.PL b/Makefile.PL
index 10aaee3..cad45f4 100644
--- a/Makefile.PL
@@ -383,39145 +162,6 @@ index 10aaee3..cad45f4 100644
-
MAKE_FRAG
}
-diff --git a/re2/.hgignore b/re2/.hgignore
-deleted file mode 100644
-index bfa7eb7..0000000
---- a/re2/.hgignore
-+++ /dev/null
-@@ -1,7 +0,0 @@
--syntax:glob
--*.pyc
--*.orig
--core
--
--syntax:regexp
--^obj/
-diff --git a/re2/AUTHORS b/re2/AUTHORS
-deleted file mode 100644
-index 3c0f928..0000000
---- a/re2/AUTHORS
-+++ /dev/null
-@@ -1,12 +0,0 @@
--# This is the official list of RE2 authors for copyright purposes.
--# This file is distinct from the CONTRIBUTORS files.
--# See the latter for an explanation.
--
--# Names should be added to this file as
--# Name or Organization <email address>
--# The email address is not required for organizations.
--
--# Please keep the list sorted.
--
--Google Inc.
--Stefano Rivera <stefano.rivera at gmail.com>
-diff --git a/re2/CONTRIBUTORS b/re2/CONTRIBUTORS
-deleted file mode 100644
-index 981ce02..0000000
---- a/re2/CONTRIBUTORS
-+++ /dev/null
-@@ -1,33 +0,0 @@
--# This is the official list of people who can contribute
--# (and typically have contributed) code to the RE2 repository.
--# The AUTHORS file lists the copyright holders; this file
--# lists people. For example, Google employees are listed here
--# but not in AUTHORS, because Google holds the copyright.
--#
--# The submission process automatically checks to make sure
--# that people submitting code are listed in this file (by email address).
--#
--# Names should be added to this file only after verifying that
--# the individual or the individual's organization has agreed to
--# the appropriate Contributor License Agreement, found here:
--#
--# http://code.google.com/legal/individual-cla-v1.0.html
--# http://code.google.com/legal/corporate-cla-v1.0.html
--#
--# The agreement for individuals can be filled out on the web.
--#
--# When adding J Random Contributor's name to this file,
--# either J's name or J's organization's name should be
--# added to the AUTHORS file, depending on whether the
--# individual or corporate CLA was used.
--
--# Names should be added to this file like so:
--# Name <email address>
--
--# Please keep the list sorted.
--
--Rob Pike <r at google.com>
--Russ Cox <rsc at swtch.com>
--Sanjay Ghemawat <sanjay at google.com>
--Stefano Rivera <stefano.rivera at gmail.com>
--Srinivasan Venkatachary <vsri at google.com>
-diff --git a/re2/LICENSE b/re2/LICENSE
-deleted file mode 100644
-index 09e5ec1..0000000
---- a/re2/LICENSE
-+++ /dev/null
-@@ -1,27 +0,0 @@
--// Copyright (c) 2009 The RE2 Authors. All rights reserved.
--//
--// Redistribution and use in source and binary forms, with or without
--// modification, are permitted provided that the following conditions are
--// met:
--//
--// * Redistributions of source code must retain the above copyright
--// notice, this list of conditions and the following disclaimer.
--// * Redistributions in binary form must reproduce the above
--// copyright notice, this list of conditions and the following disclaimer
--// in the documentation and/or other materials provided with the
--// distribution.
--// * Neither the name of Google Inc. nor the names of its
--// contributors may be used to endorse or promote products derived from
--// this software without specific prior written permission.
--//
--// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
--// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
--// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
--// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
--// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
--// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
--// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
--// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
--// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
--// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
--// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-diff --git a/re2/Makefile b/re2/Makefile
-deleted file mode 100644
-index 7d91fd5..0000000
---- a/re2/Makefile
-+++ /dev/null
-@@ -1,287 +0,0 @@
--# Copyright 2009 The RE2 Authors. All Rights Reserved.
--# Use of this source code is governed by a BSD-style
--# license that can be found in the LICENSE file.
--
--all: obj/libre2.a obj/so/libre2.so
--
--# to build against PCRE for testing or benchmarking,
--# uncomment the next two lines
--# CCPCRE=-I/usr/local/include -DUSEPCRE
--# LDPCRE=-L/usr/local/lib -lpcre
--
--#CC=g++
--#CXXFLAGS=-Wall -O3 -g -pthread # can override
--RE2_CXXFLAGS=-Wno-sign-compare -c -I. $(CCPCRE) # required
--#LDFLAGS=-pthread
--AR=ar
--ARFLAGS=rsc
--NM=nm
--NMFLAGS=-p
--
--# Variables mandated by GNU, the arbiter of all good taste on the internet.
--# http://www.gnu.org/prep/standards/standards.html
--prefix=/usr/local
--exec_prefix=$(prefix)
--bindir=$(exec_prefix)/bin
--includedir=$(prefix)/include
--libdir=$(exec_prefix)/lib
--INSTALL=install
--INSTALL_PROGRAM=$(INSTALL)
--INSTALL_DATA=$(INSTALL) -m 644
--
--# ABI version
--# http://tldp.org/HOWTO/Program-Library-HOWTO/shared-libraries.html
--SONAME=0
--
--# To rebuild the Tables generated by Perl and Python scripts (requires Internet
--# access for Unicode data), uncomment the following line:
--# REBUILD_TABLES=1
--
--#ifeq ($(shell uname),Darwin)
--#MAKE_SHARED_LIBRARY=g++ -dynamiclib $(LDFLAGS) -exported_symbols_list libre2.symbols.darwin
--#else
--#MAKE_SHARED_LIBRARY=g++ -shared -Wl,-soname,libre2.so.0,--version-script=libre2.symbols $(LDFLAGS)
--#endif
--
--INSTALL_HFILES=\
-- re2/re2.h\
-- re2/set.h\
-- re2/stringpiece.h\
-- re2/variadic_function.h\
--
--HFILES=\
-- util/arena.h\
-- util/atomicops.h\
-- util/benchmark.h\
-- util/flags.h\
-- util/logging.h\
-- util/mutex.h\
-- util/pcre.h\
-- util/random.h\
-- util/sparse_array.h\
-- util/sparse_set.h\
-- util/test.h\
-- util/utf.h\
-- util/util.h\
-- util/valgrind.h\
-- re2/filtered_re2.h\
-- re2/prefilter.h\
-- re2/prefilter_tree.h\
-- re2/prog.h\
-- re2/re2.h\
-- re2/regexp.h\
-- re2/set.h\
-- re2/stringpiece.h\
-- re2/testing/exhaustive_tester.h\
-- re2/testing/regexp_generator.h\
-- re2/testing/string_generator.h\
-- re2/testing/tester.h\
-- re2/unicode_casefold.h\
-- re2/unicode_groups.h\
-- re2/variadic_function.h\
-- re2/walker-inl.h\
--
--OFILES=\
-- obj/util/arena.o\
-- obj/util/hash.o\
-- obj/util/rune.o\
-- obj/util/stringpiece.o\
-- obj/util/stringprintf.o\
-- obj/util/strutil.o\
-- obj/util/valgrind.o\
-- obj/re2/bitstate.o\
-- obj/re2/compile.o\
-- obj/re2/dfa.o\
-- obj/re2/filtered_re2.o\
-- obj/re2/mimics_pcre.o\
-- obj/re2/nfa.o\
-- obj/re2/onepass.o\
-- obj/re2/parse.o\
-- obj/re2/perl_groups.o\
-- obj/re2/prefilter.o\
-- obj/re2/prefilter_tree.o\
-- obj/re2/prog.o\
-- obj/re2/re2.o\
-- obj/re2/regexp.o\
-- obj/re2/set.o\
-- obj/re2/simplify.o\
-- obj/re2/tostring.o\
-- obj/re2/unicode_casefold.o\
-- obj/re2/unicode_groups.o\
--
--TESTOFILES=\
-- obj/util/pcre.o\
-- obj/util/random.o\
-- obj/util/thread.o\
-- obj/re2/testing/backtrack.o\
-- obj/re2/testing/dump.o\
-- obj/re2/testing/exhaustive_tester.o\
-- obj/re2/testing/null_walker.o\
-- obj/re2/testing/regexp_generator.o\
-- obj/re2/testing/string_generator.o\
-- obj/re2/testing/tester.o\
--
--TESTS=\
-- obj/test/charclass_test\
-- obj/test/compile_test\
-- obj/test/filtered_re2_test\
-- obj/test/mimics_pcre_test\
-- obj/test/parse_test\
-- obj/test/possible_match_test\
-- obj/test/re2_test\
-- obj/test/re2_arg_test\
-- obj/test/regexp_test\
-- obj/test/required_prefix_test\
-- obj/test/search_test\
-- obj/test/set_test\
-- obj/test/simplify_test\
-- obj/test/string_generator_test\
--
--BIGTESTS=\
-- obj/test/dfa_test\
-- obj/test/exhaustive1_test\
-- obj/test/exhaustive2_test\
-- obj/test/exhaustive3_test\
-- obj/test/exhaustive_test\
-- obj/test/random_test\
--
--SOFILES=$(patsubst obj/%,obj/so/%,$(OFILES))
--STESTOFILES=$(patsubst obj/%,obj/so/%,$(TESTOFILES))
--STESTS=$(patsubst obj/%,obj/so/%,$(TESTS))
--SBIGTESTS=$(patsubst obj/%,obj/so/%,$(BIGTESTS))
--
--DOFILES=$(patsubst obj/%,obj/dbg/%,$(OFILES))
--DTESTOFILES=$(patsubst obj/%,obj/dbg/%,$(TESTOFILES))
--DTESTS=$(patsubst obj/%,obj/dbg/%,$(TESTS))
--DBIGTESTS=$(patsubst obj/%,obj/dbg/%,$(BIGTESTS))
--
--obj:
-- mkdir $@
--
--obj/re2: obj
-- cd obj && mkdir re2 || echo Okay
--
--obj/util: obj
-- cd obj && mkdir util || echo Okay
--
--obj/test: obj
-- cd obj && mkdir test || echo Okay
--
--obj/re2/testing: obj/re2
-- cd obj/re2 && mkdir testing || echo Okay
--
--obj/%.o: obj/re2 obj/re2/testing obj/util %.cc $(HFILES)
-- $(CC) -o $@ $(CXXFLAGS) $(RE2_CXXFLAGS) -DNDEBUG $*.cc
--
--obj/dbg/%.o: obj/dbg %.cc $(HFILES)
-- $(CC) -o $@ -fPIC $(CXXFLAGS) $(RE2_CXXFLAGS) $*.cc
--
--obj/so/%.o: obj/so %.cc $(HFILES)
-- $(CC) -o $@ -fPIC $(CXXFLAGS) $(RE2_CXXFLAGS) -DNDEBUG $*.cc
--
--obj/%.o: obj %.c $(HFILES)
-- $(CC) -o $@ $(CXXFLAGS) $(RE2_CXXFLAGS) -DNDEBUG $*.c
--
--obj/dbg/%.o: obj/dbg %.c $(HFILES)
-- $(CC) -o $@ $(CXXFLAGS) $(RE2_CXXFLAGS) $*.c
--
--obj/so/%.o: obj/so %.c $(HFILES)
-- $(CC) -o $@ -fPIC $(CXXFLAGS) $(RE2_CXXFLAGS) -DNDEBUG $*.c
--
--obj/libre2.a: $(OFILES)
-- $(AR) $(ARFLAGS) obj/libre2.a $(OFILES)
--
--obj/dbg/libre2.a: obj/dbg $(DOFILES)
-- $(AR) $(ARFLAGS) obj/dbg/libre2.a $(DOFILES)
--
--obj/so/libre2.so: obj/so $(SOFILES)
-- $(MAKE_SHARED_LIBRARY) -o $@.0 $(SOFILES)
-- ln -sf libre2.so.0 $@
--
--obj/test/%: obj/test obj/libre2.a obj/re2/testing/%.o $(TESTOFILES) obj/util/test.o
-- $(CC) -o $@ obj/re2/testing/$*.o $(TESTOFILES) obj/util/test.o obj/libre2.a $(LDFLAGS) $(LDPCRE)
--
--obj/dbg/test/%: obj/dbg/test obj/dbg/libre2.a obj/dbg/re2/testing/%.o $(DTESTOFILES) obj/dbg/util/test.o
-- $(CC) -o $@ obj/dbg/re2/testing/$*.o $(DTESTOFILES) obj/dbg/util/test.o obj/dbg/libre2.a $(LDFLAGS) $(LDPCRE)
--
--obj/so/test/%: obj/so/libre2.so obj/libre2.a obj/so/re2/testing/%.o $(STESTOFILES) obj/so/util/test.o
-- $(CC) -o $@ obj/so/re2/testing/$*.o $(STESTOFILES) obj/so/util/test.o -Lobj/so -lre2 obj/libre2.a $(LDFLAGS) $(LDPCRE)
--
--obj/test/regexp_benchmark: obj/test obj/libre2.a obj/re2/testing/regexp_benchmark.o $(TESTOFILES) obj/util/benchmark.o
-- $(CC) -o $@ obj/re2/testing/regexp_benchmark.o $(TESTOFILES) obj/util/benchmark.o obj/libre2.a $(LDFLAGS) $(LDPCRE)
--
--ifdef REBUILD_TABLES
--re2/perl_groups.cc: re2/make_perl_groups.pl
-- perl $< > $@
--
--re2/unicode_%.cc: re2/make_unicode_%.py
-- python $< > $@
--endif
--
--distclean: clean
-- rm -f re2/perl_groups.cc re2/unicode_casefold.cc re2/unicode_groups.cc
--
--clean:
-- rm -rf obj
-- rm -f re2/*.pyc
--
--testofiles: $(TESTOFILES)
--
--test: $(DTESTS) $(TESTS) $(STESTS) debug-test static-test shared-test
--
--debug-test: $(DTESTS)
-- @echo
-- @echo Running debug binary tests.
-- @echo
-- @./runtests $(DTESTS)
--
--static-test: $(TESTS)
-- @echo
-- @echo Running static binary tests.
-- @echo
-- @./runtests $(TESTS)
--
--shared-test: $(STESTS)
-- @echo
-- @echo Running dynamic binary tests.
-- @echo
-- @LD_LIBRARY_PATH=obj/so:$(LD_LIBRARY_PATH) ./runtests $(STESTS)
--
--debug-bigtest: $(DTESTS) $(DBIGTESTS)
-- @./runtests $(DTESTS) $(DBIGTESTS)
--
--static-bigtest: $(TESTS) $(BIGTESTS)
-- @./runtests $(TESTS) $(BIGTESTS)
--
--shared-bigtest: $(STESTS) $(SBIGTESTS)
-- @LD_LIBRARY_PATH=obj/so:$(LD_LIBRARY_PATH) ./runtests $(STESTS) $(SBIGTESTS)
--
--benchmark: obj/test/regexp_benchmark
--
--install: obj/libre2.a obj/so/libre2.so.0
-- mkdir -p $(DESTDIR)$(includedir)/re2
-- $(INSTALL_DATA) $(DESTDIR)$(INSTALL_HFILES) $(includedir)/re2
-- $(INSTALL) obj/libre2.a $(DESTDIR)$(libdir)/libre2.a
-- $(INSTALL) obj/so/libre2.so $(DESTDIR)$(libdir)/libre2.so.$(SONAME).0.0
-- ln -sf libre2.so.$(SONAME).0.0 $(DESTDIR)$(libdir)/libre2.so.$(SONAME)
-- ln -sf libre2.so.$(SONAME).0.0 $(DESTDIR)$(libdir)/libre2.so
--
--testinstall:
-- @mkdir -p obj
-- cp testinstall.cc obj
-- (cd obj && g++ -I$(DESTDIR)$(includedir) -L$(DESTDIR)$(libdir) testinstall.cc -lre2 -pthread -o testinstall)
-- LD_LIBRARY_PATH=$(DESTDIR)$(libdir) obj/testinstall
--
--benchlog: obj/test/regexp_benchmark
-- (echo '==BENCHMARK==' `hostname` `date`; \
-- (uname -a; g++ --version; hg identify; file obj/test/regexp_benchmark) | sed 's/^/# /'; \
-- echo; \
-- ./obj/test/regexp_benchmark 'PCRE|RE2') | tee -a benchlog.$$(hostname | sed 's/\..*//')
--
--# Keep gmake from deleting intermediate files it creates.
--# This makes repeated builds faster and preserves debug info on OS X.
--
--.PRECIOUS: obj/%.o obj/dbg/%.o obj/so/%.o obj/libre2.a \
-- obj/dbg/libre2.a obj/so/libre2.a \
-- obj/test/% obj/so/test/% obj/dbg/test/%
--
-diff --git a/re2/README b/re2/README
-deleted file mode 100644
-index 57b3181..0000000
---- a/re2/README
-+++ /dev/null
-@@ -1,19 +0,0 @@
--This is the source code repository for RE2, a regular expression library.
--
--For documentation about how to install and use RE2,
--visit http://code.google.com/p/re2/.
--
--The short version is:
--
--make
--make test
--make install
--make testinstall
--
--Unless otherwise noted, the RE2 source files are distributed
--under the BSD-style license found in the LICENSE file.
--
--RE2's native language is C++.
--An Inferno wrapper is at http://code.google.com/p/inferno-re2/.
--A Python wrapper is at http://github.com/facebook/pyre2/.
--A Ruby wrapper is at http://github.com/axic/rre2/.
-diff --git a/re2/libre2.symbols b/re2/libre2.symbols
-deleted file mode 100644
-index 331765b..0000000
---- a/re2/libre2.symbols
-+++ /dev/null
-@@ -1,15 +0,0 @@
--{
-- global:
-- # re2::RE2*
-- _ZN3re23RE2*;
-- _ZNK3re23RE2*;
-- # re2::StringPiece*
-- _ZN3re211StringPiece*;
-- _ZNK3re211StringPiece*;
-- # operator==(re2::StringPiece const&, re2::StringPiece const&)
-- _ZeqRKN3re211StringPieceES2_;
-- # operator<<(std::ostream&, re2::StringPiece const&)
-- _ZlsRSoRKN3re211StringPieceE;
-- local:
-- *;
--};
-diff --git a/re2/libre2.symbols.darwin b/re2/libre2.symbols.darwin
-deleted file mode 100644
-index 3fcda99..0000000
---- a/re2/libre2.symbols.darwin
-+++ /dev/null
-@@ -1,11 +0,0 @@
--# Linker doesn't like these unmangled:
--# re2::RE2*
--__ZN3re23RE2*
--__ZNK3re23RE2*
--# re2::StringPiece*
--__ZN3re211StringPiece*
--__ZNK3re211StringPiece*
--# operator==(re2::StringPiece const&, re2::StringPiece const&)
--__ZeqRKN3re211StringPieceES2_
--# operator<<(std::ostream&, re2::StringPiece const&)
--__ZlsRSoRKN3re211StringPieceE
-diff --git a/re2/re2/Makefile b/re2/re2/Makefile
-deleted file mode 100644
-index 8b13789..0000000
---- a/re2/re2/Makefile
-+++ /dev/null
-@@ -1 +0,0 @@
--
-diff --git a/re2/re2/bitstate.cc b/re2/re2/bitstate.cc
-deleted file mode 100644
-index 518d642..0000000
---- a/re2/re2/bitstate.cc
-+++ /dev/null
-@@ -1,378 +0,0 @@
--// Copyright 2008 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Tested by search_test.cc, exhaustive_test.cc, tester.cc
--
--// Prog::SearchBitState is a regular expression search with submatch
--// tracking for small regular expressions and texts. Like
--// testing/backtrack.cc, it allocates a bit vector with (length of
--// text) * (length of prog) bits, to make sure it never explores the
--// same (character position, instruction) state multiple times. This
--// limits the search to run in time linear in the length of the text.
--//
--// Unlike testing/backtrack.cc, SearchBitState is not recursive
--// on the text.
--//
--// SearchBitState is a fast replacement for the NFA code on small
--// regexps and texts when SearchOnePass cannot be used.
--
--#include "re2/prog.h"
--#include "re2/regexp.h"
--
--namespace re2 {
--
--struct Job {
-- int id;
-- int arg;
-- const char* p;
--};
--
--class BitState {
-- public:
-- explicit BitState(Prog* prog);
-- ~BitState();
--
-- // The usual Search prototype.
-- // Can only call Search once per BitState.
-- bool Search(const StringPiece& text, const StringPiece& context,
-- bool anchored, bool longest,
-- StringPiece* submatch, int nsubmatch);
--
-- private:
-- inline bool ShouldVisit(int id, const char* p);
-- void Push(int id, const char* p, int arg);
-- bool GrowStack();
-- bool TrySearch(int id, const char* p);
--
-- // Search parameters
-- Prog* prog_; // program being run
-- StringPiece text_; // text being searched
-- StringPiece context_; // greater context of text being searched
-- bool anchored_; // whether search is anchored at text.begin()
-- bool longest_; // whether search wants leftmost-longest match
-- bool endmatch_; // whether match must end at text.end()
-- StringPiece *submatch_; // submatches to fill in
-- int nsubmatch_; // # of submatches to fill in
--
-- // Search state
-- const char** cap_; // capture registers
-- int ncap_;
--
-- static const int VisitedBits = 32;
-- uint32 *visited_; // bitmap: (Inst*, char*) pairs already backtracked
-- int nvisited_; // # of words in bitmap
--
-- Job *job_; // stack of text positions to explore
-- int njob_;
-- int maxjob_;
--};
--
--BitState::BitState(Prog* prog)
-- : prog_(prog),
-- anchored_(false),
-- longest_(false),
-- endmatch_(false),
-- submatch_(NULL),
-- nsubmatch_(0),
-- cap_(NULL),
-- ncap_(0),
-- visited_(NULL),
-- nvisited_(0),
-- job_(NULL),
-- njob_(0),
-- maxjob_(0) {
--}
--
--BitState::~BitState() {
-- delete[] visited_;
-- delete[] job_;
-- delete[] cap_;
--}
--
--// Should the search visit the pair ip, p?
--// If so, remember that it was visited so that the next time,
--// we don't repeat the visit.
--bool BitState::ShouldVisit(int id, const char* p) {
-- uint n = id * (text_.size() + 1) + (p - text_.begin());
-- if (visited_[n/VisitedBits] & (1 << (n & (VisitedBits-1))))
-- return false;
-- visited_[n/VisitedBits] |= 1 << (n & (VisitedBits-1));
-- return true;
--}
--
--// Grow the stack.
--bool BitState::GrowStack() {
-- // VLOG(0) << "Reallocate.";
-- maxjob_ *= 2;
-- Job* newjob = new Job[maxjob_];
-- memmove(newjob, job_, njob_*sizeof job_[0]);
-- delete[] job_;
-- job_ = newjob;
-- if (njob_ >= maxjob_) {
-- LOG(DFATAL) << "Job stack overflow.";
-- return false;
-- }
-- return true;
--}
--
--// Push the triple (id, p, arg) onto the stack, growing it if necessary.
--void BitState::Push(int id, const char* p, int arg) {
-- if (njob_ >= maxjob_) {
-- if (!GrowStack())
-- return;
-- }
-- int op = prog_->inst(id)->opcode();
-- if (op == kInstFail)
-- return;
--
-- // Only check ShouldVisit when arg == 0.
-- // When arg > 0, we are continuing a previous visit.
-- if (arg == 0 && !ShouldVisit(id, p))
-- return;
--
-- Job* j = &job_[njob_++];
-- j->id = id;
-- j->p = p;
-- j->arg = arg;
--}
--
--// Try a search from instruction id0 in state p0.
--// Return whether it succeeded.
--bool BitState::TrySearch(int id0, const char* p0) {
-- bool matched = false;
-- const char* end = text_.end();
-- njob_ = 0;
-- Push(id0, p0, 0);
-- while (njob_ > 0) {
-- // Pop job off stack.
-- --njob_;
-- int id = job_[njob_].id;
-- const char* p = job_[njob_].p;
-- int arg = job_[njob_].arg;
--
-- // Optimization: rather than push and pop,
-- // code that is going to Push and continue
-- // the loop simply updates ip, p, and arg
-- // and jumps to CheckAndLoop. We have to
-- // do the ShouldVisit check that Push
-- // would have, but we avoid the stack
-- // manipulation.
-- if (0) {
-- CheckAndLoop:
-- if (!ShouldVisit(id, p))
-- continue;
-- }
--
-- // Visit ip, p.
-- // VLOG(0) << "Job: " << ip->id() << " "
-- // << (p - text_.begin()) << " " << arg;
-- Prog::Inst* ip = prog_->inst(id);
-- switch (ip->opcode()) {
-- case kInstFail:
-- default:
-- LOG(DFATAL) << "Unexpected opcode: " << ip->opcode() << " arg " << arg;
-- return false;
--
-- case kInstAlt:
-- // Cannot just
-- // Push(ip->out1(), p, 0);
-- // Push(ip->out(), p, 0);
-- // If, during the processing of ip->out(), we encounter
-- // ip->out1() via another path, we want to process it then.
-- // Pushing it here will inhibit that. Instead, re-push
-- // ip with arg==1 as a reminder to push ip->out1() later.
-- switch (arg) {
-- case 0:
-- Push(id, p, 1); // come back when we're done
-- id = ip->out();
-- goto CheckAndLoop;
--
-- case 1:
-- // Finished ip->out(); try ip->out1().
-- arg = 0;
-- id = ip->out1();
-- goto CheckAndLoop;
-- }
-- LOG(DFATAL) << "Bad arg in kInstCapture: " << arg;
-- continue;
--
-- case kInstAltMatch:
-- // One opcode is byte range; the other leads to match.
-- if (ip->greedy(prog_)) {
-- // out1 is the match
-- Push(ip->out1(), p, 0);
-- id = ip->out1();
-- p = end;
-- goto CheckAndLoop;
-- }
-- // out is the match - non-greedy
-- Push(ip->out(), end, 0);
-- id = ip->out();
-- goto CheckAndLoop;
--
-- case kInstByteRange: {
-- int c = -1;
-- if (p < end)
-- c = *p & 0xFF;
-- if (ip->Matches(c)) {
-- id = ip->out();
-- p++;
-- goto CheckAndLoop;
-- }
-- continue;
-- }
--
-- case kInstCapture:
-- switch (arg) {
-- case 0:
-- if (0 <= ip->cap() && ip->cap() < ncap_) {
-- // Capture p to register, but save old value.
-- Push(id, cap_[ip->cap()], 1); // come back when we're done
-- cap_[ip->cap()] = p;
-- }
-- // Continue on.
-- id = ip->out();
-- goto CheckAndLoop;
-- case 1:
-- // Finished ip->out(); restore the old value.
-- cap_[ip->cap()] = p;
-- continue;
-- }
-- LOG(DFATAL) << "Bad arg in kInstCapture: " << arg;
-- continue;
--
-- case kInstEmptyWidth:
-- if (ip->empty() & ~Prog::EmptyFlags(context_, p))
-- continue;
-- id = ip->out();
-- goto CheckAndLoop;
--
-- case kInstNop:
-- id = ip->out();
-- goto CheckAndLoop;
--
-- case kInstMatch: {
-- if (endmatch_ && p != text_.end())
-- continue;
--
-- // VLOG(0) << "Found match.";
-- // We found a match. If the caller doesn't care
-- // where the match is, no point going further.
-- if (nsubmatch_ == 0)
-- return true;
--
-- // Record best match so far.
-- // Only need to check end point, because this entire
-- // call is only considering one start position.
-- matched = true;
-- cap_[1] = p;
-- if (submatch_[0].data() == NULL ||
-- (longest_ && p > submatch_[0].end())) {
-- for (int i = 0; i < nsubmatch_; i++)
-- submatch_[i] = StringPiece(cap_[2*i], cap_[2*i+1] - cap_[2*i]);
-- }
--
-- // If going for first match, we're done.
-- if (!longest_)
-- return true;
--
-- // If we used the entire text, no longer match is possible.
-- if (p == text_.end())
-- return true;
--
-- // Otherwise, continue on in hope of a longer match.
-- continue;
-- }
-- }
-- }
-- return matched;
--}
--
--// Search text (within context) for prog_.
--bool BitState::Search(const StringPiece& text, const StringPiece& context,
-- bool anchored, bool longest,
-- StringPiece* submatch, int nsubmatch) {
-- // Search parameters.
-- text_ = text;
-- context_ = context;
-- if (context_.begin() == NULL)
-- context_ = text;
-- if (prog_->anchor_start() && context_.begin() != text.begin())
-- return false;
-- if (prog_->anchor_end() && context_.end() != text.end())
-- return false;
-- anchored_ = anchored || prog_->anchor_start();
-- longest_ = longest || prog_->anchor_end();
-- endmatch_ = prog_->anchor_end();
-- submatch_ = submatch;
-- nsubmatch_ = nsubmatch;
-- for (int i = 0; i < nsubmatch_; i++)
-- submatch_[i] = NULL;
--
-- // Allocate scratch space.
-- nvisited_ = (prog_->size() * (text.size()+1) + VisitedBits-1) / VisitedBits;
-- visited_ = new uint32[nvisited_];
-- memset(visited_, 0, nvisited_*sizeof visited_[0]);
-- // VLOG(0) << "nvisited_ = " << nvisited_;
--
-- ncap_ = 2*nsubmatch;
-- if (ncap_ < 2)
-- ncap_ = 2;
-- cap_ = new const char*[ncap_];
-- memset(cap_, 0, ncap_*sizeof cap_[0]);
--
-- maxjob_ = 256;
-- job_ = new Job[maxjob_];
--
-- // Anchored search must start at text.begin().
-- if (anchored_) {
-- cap_[0] = text.begin();
-- return TrySearch(prog_->start(), text.begin());
-- }
--
-- // Unanchored search, starting from each possible text position.
-- // Notice that we have to try the empty string at the end of
-- // the text, so the loop condition is p <= text.end(), not p < text.end().
-- // This looks like it's quadratic in the size of the text,
-- // but we are not clearing visited_ between calls to TrySearch,
-- // so no work is duplicated and it ends up still being linear.
-- for (const char* p = text.begin(); p <= text.end(); p++) {
-- cap_[0] = p;
-- if (TrySearch(prog_->start(), p)) // Match must be leftmost; done.
-- return true;
-- }
-- return false;
--}
--
--// Bit-state search.
--bool Prog::SearchBitState(const StringPiece& text,
-- const StringPiece& context,
-- Anchor anchor,
-- MatchKind kind,
-- StringPiece* match,
-- int nmatch) {
-- // If full match, we ask for an anchored longest match
-- // and then check that match[0] == text.
-- // So make sure match[0] exists.
-- StringPiece sp0;
-- if (kind == kFullMatch) {
-- anchor = kAnchored;
-- if (nmatch < 1) {
-- match = &sp0;
-- nmatch = 1;
-- }
-- }
--
-- // Run the search.
-- BitState b(this);
-- bool anchored = anchor == kAnchored;
-- bool longest = kind != kFirstMatch;
-- if (!b.Search(text, context, anchored, longest, match, nmatch))
-- return false;
-- if (kind == kFullMatch && match[0].end() != text.end())
-- return false;
-- return true;
--}
--
--} // namespace re2
-diff --git a/re2/re2/compile.cc b/re2/re2/compile.cc
-deleted file mode 100644
-index 67c4c2c..0000000
---- a/re2/re2/compile.cc
-+++ /dev/null
-@@ -1,1138 +0,0 @@
--// Copyright 2007 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Compile regular expression to Prog.
--//
--// Prog and Inst are defined in prog.h.
--// This file's external interface is just Regexp::CompileToProg.
--// The Compiler class defined in this file is private.
--
--#include "re2/prog.h"
--#include "re2/re2.h"
--#include "re2/regexp.h"
--#include "re2/walker-inl.h"
--
--namespace re2 {
--
--// List of pointers to Inst* that need to be filled in (patched).
--// Because the Inst* haven't been filled in yet,
--// we can use the Inst* word to hold the list's "next" pointer.
--// It's kind of sleazy, but it works well in practice.
--// See http://swtch.com/~rsc/regexp/regexp1.html for inspiration.
--//
--// Because the out and out1 fields in Inst are no longer pointers,
--// we can't use pointers directly here either. Instead, p refers
--// to inst_[p>>1].out (p&1 == 0) or inst_[p>>1].out1 (p&1 == 1).
--// p == 0 represents the NULL list. This is okay because instruction #0
--// is always the fail instruction, which never appears on a list.
--
--struct PatchList {
-- uint32 p;
--
-- // Returns patch list containing just p.
-- static PatchList Mk(uint32 p);
--
-- // Patches all the entries on l to have value v.
-- // Caller must not ever use patch list again.
-- static void Patch(Prog::Inst *inst0, PatchList l, uint32 v);
--
-- // Deref returns the next pointer pointed at by p.
-- static PatchList Deref(Prog::Inst *inst0, PatchList l);
--
-- // Appends two patch lists and returns result.
-- static PatchList Append(Prog::Inst *inst0, PatchList l1, PatchList l2);
--};
--
--static PatchList nullPatchList = { 0 };
--
--// Returns patch list containing just p.
--PatchList PatchList::Mk(uint32 p) {
-- PatchList l;
-- l.p = p;
-- return l;
--}
--
--// Returns the next pointer pointed at by l.
--PatchList PatchList::Deref(Prog::Inst* inst0, PatchList l) {
-- Prog::Inst* ip = &inst0[l.p>>1];
-- if (l.p&1)
-- l.p = ip->out1();
-- else
-- l.p = ip->out();
-- return l;
--}
--
--// Patches all the entries on l to have value v.
--void PatchList::Patch(Prog::Inst *inst0, PatchList l, uint32 val) {
-- while (l.p != 0) {
-- Prog::Inst* ip = &inst0[l.p>>1];
-- if (l.p&1) {
-- l.p = ip->out1();
-- ip->out1_ = val;
-- } else {
-- l.p = ip->out();
-- ip->set_out(val);
-- }
-- }
--}
--
--// Appends two patch lists and returns result.
--PatchList PatchList::Append(Prog::Inst* inst0, PatchList l1, PatchList l2) {
-- if (l1.p == 0)
-- return l2;
-- if (l2.p == 0)
-- return l1;
--
-- PatchList l = l1;
-- for (;;) {
-- PatchList next = PatchList::Deref(inst0, l);
-- if (next.p == 0)
-- break;
-- l = next;
-- }
--
-- Prog::Inst* ip = &inst0[l.p>>1];
-- if (l.p&1)
-- ip->out1_ = l2.p;
-- else
-- ip->set_out(l2.p);
--
-- return l1;
--}
--
--// Compiled program fragment.
--struct Frag {
-- uint32 begin;
-- PatchList end;
--
-- Frag() : begin(0) { end.p = 0; } // needed so Frag can go in vector
-- Frag(uint32 begin, PatchList end) : begin(begin), end(end) {}
--};
--
--static Frag kNullFrag;
--
--// Input encodings.
--enum Encoding {
-- kEncodingUTF8 = 1, // UTF-8 (0-10FFFF)
-- kEncodingLatin1, // Latin1 (0-FF)
--};
--
--class Compiler : public Regexp::Walker<Frag> {
-- public:
-- explicit Compiler();
-- ~Compiler();
--
-- // Compiles Regexp to a new Prog.
-- // Caller is responsible for deleting Prog when finished with it.
-- // If reversed is true, compiles for walking over the input
-- // string backward (reverses all concatenations).
-- static Prog *Compile(Regexp* re, bool reversed, int64 max_mem);
--
-- // Compiles alternation of all the re to a new Prog.
-- // Each re has a match with an id equal to its index in the vector.
-- static Prog* CompileSet(const RE2::Options& options, RE2::Anchor anchor,
-- Regexp* re);
--
-- // Interface for Regexp::Walker, which helps traverse the Regexp.
-- // The walk is purely post-recursive: given the machines for the
-- // children, PostVisit combines them to create the machine for
-- // the current node. The child_args are Frags.
-- // The Compiler traverses the Regexp parse tree, visiting
-- // each node in depth-first order. It invokes PreVisit before
-- // visiting the node's children and PostVisit after visiting
-- // the children.
-- Frag PreVisit(Regexp* re, Frag parent_arg, bool* stop);
-- Frag PostVisit(Regexp* re, Frag parent_arg, Frag pre_arg, Frag* child_args,
-- int nchild_args);
-- Frag ShortVisit(Regexp* re, Frag parent_arg);
-- Frag Copy(Frag arg);
--
-- // Given fragment a, returns a+ or a+?; a* or a*?; a? or a??
-- Frag Plus(Frag a, bool nongreedy);
-- Frag Star(Frag a, bool nongreedy);
-- Frag Quest(Frag a, bool nongreedy);
--
-- // Given fragment a, returns (a) capturing as \n.
-- Frag Capture(Frag a, int n);
--
-- // Given fragments a and b, returns ab; a|b
-- Frag Cat(Frag a, Frag b);
-- Frag Alt(Frag a, Frag b);
--
-- // Returns a fragment that can't match anything.
-- Frag NoMatch();
--
-- // Returns a fragment that matches the empty string.
-- Frag Match(int32 id);
--
-- // Returns a no-op fragment.
-- Frag Nop();
--
-- // Returns a fragment matching the byte range lo-hi.
-- Frag ByteRange(int lo, int hi, bool foldcase);
--
-- // Returns a fragment matching an empty-width special op.
-- Frag EmptyWidth(EmptyOp op);
--
-- // Adds n instructions to the program.
-- // Returns the index of the first one.
-- // Returns -1 if no more instructions are available.
-- int AllocInst(int n);
--
-- // Deletes unused instructions.
-- void Trim();
--
-- // Rune range compiler.
--
-- // Begins a new alternation.
-- void BeginRange();
--
-- // Adds a fragment matching the rune range lo-hi.
-- void AddRuneRange(Rune lo, Rune hi, bool foldcase);
-- void AddRuneRangeLatin1(Rune lo, Rune hi, bool foldcase);
-- void AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase);
-- void Add_80_10ffff();
--
-- // New suffix that matches the byte range lo-hi, then goes to next.
-- int RuneByteSuffix(uint8 lo, uint8 hi, bool foldcase, int next);
-- int UncachedRuneByteSuffix(uint8 lo, uint8 hi, bool foldcase, int next);
--
-- // Adds a suffix to alternation.
-- void AddSuffix(int id);
--
-- // Returns the alternation of all the added suffixes.
-- Frag EndRange();
--
-- // Single rune.
-- Frag Literal(Rune r, bool foldcase);
--
-- void Setup(Regexp::ParseFlags, int64, RE2::Anchor);
-- Prog* Finish();
--
-- // Returns .* where dot = any byte
-- Frag DotStar();
--
-- private:
-- Prog* prog_; // Program being built.
-- bool failed_; // Did we give up compiling?
-- Encoding encoding_; // Input encoding
-- bool reversed_; // Should program run backward over text?
--
-- int max_inst_; // Maximum number of instructions.
--
-- Prog::Inst* inst_; // Pointer to first instruction.
-- int inst_len_; // Number of instructions used.
-- int inst_cap_; // Number of instructions allocated.
--
-- int64 max_mem_; // Total memory budget.
--
-- map<uint64, int> rune_cache_;
-- Frag rune_range_;
--
-- RE2::Anchor anchor_; // anchor mode for RE2::Set
--
-- DISALLOW_EVIL_CONSTRUCTORS(Compiler);
--};
--
--Compiler::Compiler() {
-- prog_ = new Prog();
-- failed_ = false;
-- encoding_ = kEncodingUTF8;
-- reversed_ = false;
-- inst_ = NULL;
-- inst_len_ = 0;
-- inst_cap_ = 0;
-- max_inst_ = 1; // make AllocInst for fail instruction okay
-- max_mem_ = 0;
-- int fail = AllocInst(1);
-- inst_[fail].InitFail();
-- max_inst_ = 0; // Caller must change
--}
--
--Compiler::~Compiler() {
-- delete prog_;
-- delete[] inst_;
--}
--
--int Compiler::AllocInst(int n) {
-- if (failed_ || inst_len_ + n > max_inst_) {
-- failed_ = true;
-- return -1;
-- }
--
-- if (inst_len_ + n > inst_cap_) {
-- if (inst_cap_ == 0)
-- inst_cap_ = 8;
-- while (inst_len_ + n > inst_cap_)
-- inst_cap_ *= 2;
-- Prog::Inst* ip = new Prog::Inst[inst_cap_];
-- memmove(ip, inst_, inst_len_ * sizeof ip[0]);
-- memset(ip + inst_len_, 0, (inst_cap_ - inst_len_) * sizeof ip[0]);
-- delete[] inst_;
-- inst_ = ip;
-- }
-- int id = inst_len_;
-- inst_len_ += n;
-- return id;
--}
--
--void Compiler::Trim() {
-- if (inst_len_ < inst_cap_) {
-- Prog::Inst* ip = new Prog::Inst[inst_len_];
-- memmove(ip, inst_, inst_len_ * sizeof ip[0]);
-- delete[] inst_;
-- inst_ = ip;
-- inst_cap_ = inst_len_;
-- }
--}
--
--// These routines are somewhat hard to visualize in text --
--// see http://swtch.com/~rsc/regexp/regexp1.html for
--// pictures explaining what is going on here.
--
--// Returns an unmatchable fragment.
--Frag Compiler::NoMatch() {
-- return Frag(0, nullPatchList);
--}
--
--// Is a an unmatchable fragment?
--static bool IsNoMatch(Frag a) {
-- return a.begin == 0;
--}
--
--// Given fragments a and b, returns fragment for ab.
--Frag Compiler::Cat(Frag a, Frag b) {
-- if (IsNoMatch(a) || IsNoMatch(b))
-- return NoMatch();
--
-- // Elide no-op.
-- Prog::Inst* begin = &inst_[a.begin];
-- if (begin->opcode() == kInstNop &&
-- a.end.p == (a.begin << 1) &&
-- begin->out() == 0) {
-- PatchList::Patch(inst_, a.end, b.begin); // in case refs to a somewhere
-- return b;
-- }
--
-- // To run backward over string, reverse all concatenations.
-- if (reversed_) {
-- PatchList::Patch(inst_, b.end, a.begin);
-- return Frag(b.begin, a.end);
-- }
--
-- PatchList::Patch(inst_, a.end, b.begin);
-- return Frag(a.begin, b.end);
--}
--
--// Given fragments for a and b, returns fragment for a|b.
--Frag Compiler::Alt(Frag a, Frag b) {
-- // Special case for convenience in loops.
-- if (IsNoMatch(a))
-- return b;
-- if (IsNoMatch(b))
-- return a;
--
-- int id = AllocInst(1);
-- if (id < 0)
-- return NoMatch();
--
-- inst_[id].InitAlt(a.begin, b.begin);
-- return Frag(id, PatchList::Append(inst_, a.end, b.end));
--}
--
--// When capturing submatches in like-Perl mode, a kOpAlt Inst
--// treats out_ as the first choice, out1_ as the second.
--//
--// For *, +, and ?, if out_ causes another repetition,
--// then the operator is greedy. If out1_ is the repetition
--// (and out_ moves forward), then the operator is non-greedy.
--
--// Given a fragment a, returns a fragment for a* or a*? (if nongreedy)
--Frag Compiler::Star(Frag a, bool nongreedy) {
-- int id = AllocInst(1);
-- if (id < 0)
-- return NoMatch();
-- inst_[id].InitAlt(0, 0);
-- PatchList::Patch(inst_, a.end, id);
-- if (nongreedy) {
-- inst_[id].out1_ = a.begin;
-- return Frag(id, PatchList::Mk(id << 1));
-- } else {
-- inst_[id].set_out(a.begin);
-- return Frag(id, PatchList::Mk((id << 1) | 1));
-- }
--}
--
--// Given a fragment for a, returns a fragment for a+ or a+? (if nongreedy)
--Frag Compiler::Plus(Frag a, bool nongreedy) {
-- // a+ is just a* with a different entry point.
-- Frag f = Star(a, nongreedy);
-- return Frag(a.begin, f.end);
--}
--
--// Given a fragment for a, returns a fragment for a? or a?? (if nongreedy)
--Frag Compiler::Quest(Frag a, bool nongreedy) {
-- int id = AllocInst(1);
-- if (id < 0)
-- return NoMatch();
-- PatchList pl;
-- if (nongreedy) {
-- inst_[id].InitAlt(0, a.begin);
-- pl = PatchList::Mk(id << 1);
-- } else {
-- inst_[id].InitAlt(a.begin, 0);
-- pl = PatchList::Mk((id << 1) | 1);
-- }
-- return Frag(id, PatchList::Append(inst_, pl, a.end));
--}
--
--// Returns a fragment for the byte range lo-hi.
--Frag Compiler::ByteRange(int lo, int hi, bool foldcase) {
-- int id = AllocInst(1);
-- if (id < 0)
-- return NoMatch();
-- inst_[id].InitByteRange(lo, hi, foldcase, 0);
-- prog_->byte_inst_count_++;
-- prog_->MarkByteRange(lo, hi);
-- if (foldcase && lo <= 'z' && hi >= 'a') {
-- if (lo < 'a')
-- lo = 'a';
-- if (hi > 'z')
-- hi = 'z';
-- if (lo <= hi)
-- prog_->MarkByteRange(lo + 'A' - 'a', hi + 'A' - 'a');
-- }
-- return Frag(id, PatchList::Mk(id << 1));
--}
--
--// Returns a no-op fragment. Sometimes unavoidable.
--Frag Compiler::Nop() {
-- int id = AllocInst(1);
-- if (id < 0)
-- return NoMatch();
-- inst_[id].InitNop(0);
-- return Frag(id, PatchList::Mk(id << 1));
--}
--
--// Returns a fragment that signals a match.
--Frag Compiler::Match(int32 match_id) {
-- int id = AllocInst(1);
-- if (id < 0)
-- return NoMatch();
-- inst_[id].InitMatch(match_id);
-- return Frag(id, nullPatchList);
--}
--
--// Returns a fragment matching a particular empty-width op (like ^ or $)
--Frag Compiler::EmptyWidth(EmptyOp empty) {
-- int id = AllocInst(1);
-- if (id < 0)
-- return NoMatch();
-- inst_[id].InitEmptyWidth(empty, 0);
-- if (empty & (kEmptyBeginLine|kEmptyEndLine))
-- prog_->MarkByteRange('\n', '\n');
-- if (empty & (kEmptyWordBoundary|kEmptyNonWordBoundary)) {
-- int j;
-- for (int i = 0; i < 256; i = j) {
-- for (j = i+1; j < 256 && Prog::IsWordChar(i) == Prog::IsWordChar(j); j++)
-- ;
-- prog_->MarkByteRange(i, j-1);
-- }
-- }
-- return Frag(id, PatchList::Mk(id << 1));
--}
--
--// Given a fragment a, returns a fragment with capturing parens around a.
--Frag Compiler::Capture(Frag a, int n) {
-- int id = AllocInst(2);
-- if (id < 0)
-- return NoMatch();
-- inst_[id].InitCapture(2*n, a.begin);
-- inst_[id+1].InitCapture(2*n+1, 0);
-- PatchList::Patch(inst_, a.end, id+1);
--
-- return Frag(id, PatchList::Mk((id+1) << 1));
--}
--
--// A Rune is a name for a Unicode code point.
--// Returns maximum rune encoded by UTF-8 sequence of length len.
--static int MaxRune(int len) {
-- int b; // number of Rune blents lenn len-byte UTF-8 sequence (len < UTFmax)
-- if (len == 1)
-- b = 7;
-- else
-- b = 8-(len+1) + 6*(len-1);
-- return (1<<b) - 1; // maximum Rune for b bits.
--}
--
--// The rune range compiler caches common suffix fragments,
--// which are very common in UTF-8 (e.g., [80-bf]).
--// The fragment suffixes are identified by their start
--// instructions. NULL denotes the eventual end match.
--// The Frag accumulates in rune_range_. Caching common
--// suffixes reduces the UTF-8 "." from 32 to 24 instructions,
--// and it reduces the corresponding one-pass NFA from 16 nodes to 8.
--
--void Compiler::BeginRange() {
-- rune_cache_.clear();
-- rune_range_.begin = 0;
-- rune_range_.end = nullPatchList;
--}
--
--int Compiler::UncachedRuneByteSuffix(uint8 lo, uint8 hi, bool foldcase,
-- int next) {
-- Frag f = ByteRange(lo, hi, foldcase);
-- if (next != 0) {
-- PatchList::Patch(inst_, f.end, next);
-- } else {
-- rune_range_.end = PatchList::Append(inst_, rune_range_.end, f.end);
-- }
-- return f.begin;
--}
--
--int Compiler::RuneByteSuffix(uint8 lo, uint8 hi, bool foldcase, int next) {
-- // In Latin1 mode, there's no point in caching.
-- // In forward UTF-8 mode, only need to cache continuation bytes.
-- if (encoding_ == kEncodingLatin1 ||
-- (encoding_ == kEncodingUTF8 &&
-- !reversed_ &&
-- !(0x80 <= lo && hi <= 0xbf))) {
-- return UncachedRuneByteSuffix(lo, hi, foldcase, next);
-- }
--
-- uint64 key = ((uint64)next << 17) | (lo<<9) | (hi<<1) | foldcase;
-- map<uint64, int>::iterator it = rune_cache_.find(key);
-- if (it != rune_cache_.end())
-- return it->second;
-- int id = UncachedRuneByteSuffix(lo, hi, foldcase, next);
-- rune_cache_[key] = id;
-- return id;
--}
--
--void Compiler::AddSuffix(int id) {
-- if (rune_range_.begin == 0) {
-- rune_range_.begin = id;
-- return;
-- }
--
-- int alt = AllocInst(1);
-- if (alt < 0) {
-- rune_range_.begin = 0;
-- return;
-- }
-- inst_[alt].InitAlt(rune_range_.begin, id);
-- rune_range_.begin = alt;
--}
--
--Frag Compiler::EndRange() {
-- return rune_range_;
--}
--
--// Converts rune range lo-hi into a fragment that recognizes
--// the bytes that would make up those runes in the current
--// encoding (Latin 1 or UTF-8).
--// This lets the machine work byte-by-byte even when
--// using multibyte encodings.
--
--void Compiler::AddRuneRange(Rune lo, Rune hi, bool foldcase) {
-- switch (encoding_) {
-- default:
-- case kEncodingUTF8:
-- AddRuneRangeUTF8(lo, hi, foldcase);
-- break;
-- case kEncodingLatin1:
-- AddRuneRangeLatin1(lo, hi, foldcase);
-- break;
-- }
--}
--
--void Compiler::AddRuneRangeLatin1(Rune lo, Rune hi, bool foldcase) {
-- // Latin1 is easy: runes *are* bytes.
-- if (lo > hi || lo > 0xFF)
-- return;
-- if (hi > 0xFF)
-- hi = 0xFF;
-- AddSuffix(RuneByteSuffix(lo, hi, foldcase, 0));
--}
--
--// Table describing how to make a UTF-8 matching machine
--// for the rune range 80-10FFFF (Runeself-Runemax).
--// This range happens frequently enough (for example /./ and /[^a-z]/)
--// and the rune_cache_ map is slow enough that this is worth
--// special handling. Makes compilation of a small expression
--// with a dot in it about 10% faster.
--// The * in the comments below mark whole sequences.
--static struct ByteRangeProg {
-- int next;
-- int lo;
-- int hi;
--} prog_80_10ffff[] = {
-- // Two-byte
-- { -1, 0x80, 0xBF, }, // 0: 80-BF
-- { 0, 0xC2, 0xDF, }, // 1: C2-DF 80-BF*
--
-- // Three-byte
-- { 0, 0xA0, 0xBF, }, // 2: A0-BF 80-BF
-- { 2, 0xE0, 0xE0, }, // 3: E0 A0-BF 80-BF*
-- { 0, 0x80, 0xBF, }, // 4: 80-BF 80-BF
-- { 4, 0xE1, 0xEF, }, // 5: E1-EF 80-BF 80-BF*
--
-- // Four-byte
-- { 4, 0x90, 0xBF, }, // 6: 90-BF 80-BF 80-BF
-- { 6, 0xF0, 0xF0, }, // 7: F0 90-BF 80-BF 80-BF*
-- { 4, 0x80, 0xBF, }, // 8: 80-BF 80-BF 80-BF
-- { 8, 0xF1, 0xF3, }, // 9: F1-F3 80-BF 80-BF 80-BF*
-- { 4, 0x80, 0x8F, }, // 10: 80-8F 80-BF 80-BF
-- { 10, 0xF4, 0xF4, }, // 11: F4 80-8F 80-BF 80-BF*
--};
--
--void Compiler::Add_80_10ffff() {
-- int inst[arraysize(prog_80_10ffff)];
-- for (int i = 0; i < arraysize(prog_80_10ffff); i++) {
-- const ByteRangeProg& p = prog_80_10ffff[i];
-- int next = 0;
-- if (p.next >= 0)
-- next = inst[p.next];
-- inst[i] = UncachedRuneByteSuffix(p.lo, p.hi, false, next);
-- if ((p.lo & 0xC0) != 0x80)
-- AddSuffix(inst[i]);
-- }
--}
--
--void Compiler::AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase) {
-- if (lo > hi)
-- return;
--
-- // Pick off 80-10FFFF as a common special case
-- // that can bypass the slow rune_cache_.
-- if (lo == 0x80 && hi == 0x10ffff && !reversed_) {
-- Add_80_10ffff();
-- return;
-- }
--
-- // Split range into same-length sized ranges.
-- for (int i = 1; i < UTFmax; i++) {
-- Rune max = MaxRune(i);
-- if (lo <= max && max < hi) {
-- AddRuneRangeUTF8(lo, max, foldcase);
-- AddRuneRangeUTF8(max+1, hi, foldcase);
-- return;
-- }
-- }
--
-- // ASCII range is always a special case.
-- if (hi < Runeself) {
-- AddSuffix(RuneByteSuffix(lo, hi, foldcase, 0));
-- return;
-- }
--
-- // Split range into sections that agree on leading bytes.
-- for (int i = 1; i < UTFmax; i++) {
-- uint m = (1<<(6*i)) - 1; // last i bytes of a UTF-8 sequence
-- if ((lo & ~m) != (hi & ~m)) {
-- if ((lo & m) != 0) {
-- AddRuneRangeUTF8(lo, lo|m, foldcase);
-- AddRuneRangeUTF8((lo|m)+1, hi, foldcase);
-- return;
-- }
-- if ((hi & m) != m) {
-- AddRuneRangeUTF8(lo, (hi&~m)-1, foldcase);
-- AddRuneRangeUTF8(hi&~m, hi, foldcase);
-- return;
-- }
-- }
-- }
--
-- // Finally. Generate byte matching equivalent for lo-hi.
-- uint8 ulo[UTFmax], uhi[UTFmax];
-- int n = runetochar(reinterpret_cast<char*>(ulo), &lo);
-- int m = runetochar(reinterpret_cast<char*>(uhi), &hi);
-- (void)m; // USED(m)
-- DCHECK_EQ(n, m);
--
-- int id = 0;
-- if (reversed_) {
-- for (int i = 0; i < n; i++)
-- id = RuneByteSuffix(ulo[i], uhi[i], false, id);
-- } else {
-- for (int i = n-1; i >= 0; i--)
-- id = RuneByteSuffix(ulo[i], uhi[i], false, id);
-- }
-- AddSuffix(id);
--}
--
--// Should not be called.
--Frag Compiler::Copy(Frag arg) {
-- // We're using WalkExponential; there should be no copying.
-- LOG(DFATAL) << "Compiler::Copy called!";
-- failed_ = true;
-- return NoMatch();
--}
--
--// Visits a node quickly; called once WalkExponential has
--// decided to cut this walk short.
--Frag Compiler::ShortVisit(Regexp* re, Frag) {
-- failed_ = true;
-- return NoMatch();
--}
--
--// Called before traversing a node's children during the walk.
--Frag Compiler::PreVisit(Regexp* re, Frag, bool* stop) {
-- // Cut off walk if we've already failed.
-- if (failed_)
-- *stop = true;
--
-- return kNullFrag; // not used by caller
--}
--
--Frag Compiler::Literal(Rune r, bool foldcase) {
-- switch (encoding_) {
-- default:
-- return kNullFrag;
--
-- case kEncodingLatin1:
-- return ByteRange(r, r, foldcase);
--
-- case kEncodingUTF8: {
-- if (r < Runeself) // Make common case fast.
-- return ByteRange(r, r, foldcase);
-- uint8 buf[UTFmax];
-- int n = runetochar(reinterpret_cast<char*>(buf), &r);
-- Frag f = ByteRange((uint8)buf[0], buf[0], false);
-- for (int i = 1; i < n; i++)
-- f = Cat(f, ByteRange((uint8)buf[i], buf[i], false));
-- return f;
-- }
-- }
--}
--
--// Called after traversing the node's children during the walk.
--// Given their frags, build and return the frag for this re.
--Frag Compiler::PostVisit(Regexp* re, Frag, Frag, Frag* child_frags,
-- int nchild_frags) {
-- // If a child failed, don't bother going forward, especially
-- // since the child_frags might contain Frags with NULLs in them.
-- if (failed_)
-- return NoMatch();
--
-- // Given the child fragments, return the fragment for this node.
-- switch (re->op()) {
-- case kRegexpRepeat:
-- // Should not see; code at bottom of function will print error
-- break;
--
-- case kRegexpNoMatch:
-- return NoMatch();
--
-- case kRegexpEmptyMatch:
-- return Nop();
--
-- case kRegexpHaveMatch: {
-- Frag f = Match(re->match_id());
-- // Remember unanchored match to end of string.
-- if (anchor_ != RE2::ANCHOR_BOTH)
-- f = Cat(DotStar(), f);
-- return f;
-- }
--
-- case kRegexpConcat: {
-- Frag f = child_frags[0];
-- for (int i = 1; i < nchild_frags; i++)
-- f = Cat(f, child_frags[i]);
-- return f;
-- }
--
-- case kRegexpAlternate: {
-- Frag f = child_frags[0];
-- for (int i = 1; i < nchild_frags; i++)
-- f = Alt(f, child_frags[i]);
-- return f;
-- }
--
-- case kRegexpStar:
-- return Star(child_frags[0], re->parse_flags()&Regexp::NonGreedy);
--
-- case kRegexpPlus:
-- return Plus(child_frags[0], re->parse_flags()&Regexp::NonGreedy);
--
-- case kRegexpQuest:
-- return Quest(child_frags[0], re->parse_flags()&Regexp::NonGreedy);
--
-- case kRegexpLiteral:
-- return Literal(re->rune(), re->parse_flags()&Regexp::FoldCase);
--
-- case kRegexpLiteralString: {
-- // Concatenation of literals.
-- if (re->nrunes() == 0)
-- return Nop();
-- Frag f;
-- for (int i = 0; i < re->nrunes(); i++) {
-- Frag f1 = Literal(re->runes()[i], re->parse_flags()&Regexp::FoldCase);
-- if (i == 0)
-- f = f1;
-- else
-- f = Cat(f, f1);
-- }
-- return f;
-- }
--
-- case kRegexpAnyChar:
-- BeginRange();
-- AddRuneRange(0, Runemax, false);
-- return EndRange();
--
-- case kRegexpAnyByte:
-- return ByteRange(0x00, 0xFF, false);
--
-- case kRegexpCharClass: {
-- CharClass* cc = re->cc();
-- if (cc->empty()) {
-- // This can't happen.
-- LOG(DFATAL) << "No ranges in char class";
-- failed_ = true;
-- return NoMatch();
-- }
--
-- // ASCII case-folding optimization: if the char class
-- // behaves the same on A-Z as it does on a-z,
-- // discard any ranges wholly contained in A-Z
-- // and mark the other ranges as foldascii.
-- // This reduces the size of a program for
-- // (?i)abc from 3 insts per letter to 1 per letter.
-- bool foldascii = cc->FoldsASCII();
--
-- // Character class is just a big OR of the different
-- // character ranges in the class.
-- BeginRange();
-- for (CharClass::iterator i = cc->begin(); i != cc->end(); ++i) {
-- // ASCII case-folding optimization (see above).
-- if (foldascii && 'A' <= i->lo && i->hi <= 'Z')
-- continue;
--
-- // If this range contains all of A-Za-z or none of it,
-- // the fold flag is unnecessary; don't bother.
-- bool fold = foldascii;
-- if ((i->lo <= 'A' && 'z' <= i->hi) || i->hi < 'A' || 'z' < i->lo)
-- fold = false;
--
-- AddRuneRange(i->lo, i->hi, fold);
-- }
-- return EndRange();
-- }
--
-- case kRegexpCapture:
-- // If this is a non-capturing parenthesis -- (?:foo) --
-- // just use the inner expression.
-- if (re->cap() < 0)
-- return child_frags[0];
-- return Capture(child_frags[0], re->cap());
--
-- case kRegexpBeginLine:
-- return EmptyWidth(reversed_ ? kEmptyEndLine : kEmptyBeginLine);
--
-- case kRegexpEndLine:
-- return EmptyWidth(reversed_ ? kEmptyBeginLine : kEmptyEndLine);
--
-- case kRegexpBeginText:
-- return EmptyWidth(reversed_ ? kEmptyEndText : kEmptyBeginText);
--
-- case kRegexpEndText:
-- return EmptyWidth(reversed_ ? kEmptyBeginText : kEmptyEndText);
--
-- case kRegexpWordBoundary:
-- return EmptyWidth(kEmptyWordBoundary);
--
-- case kRegexpNoWordBoundary:
-- return EmptyWidth(kEmptyNonWordBoundary);
-- }
-- LOG(DFATAL) << "Missing case in Compiler: " << re->op();
-- failed_ = true;
-- return NoMatch();
--}
--
--// Is this regexp required to start at the beginning of the text?
--// Only approximate; can return false for complicated regexps like (\Aa|\Ab),
--// but handles (\A(a|b)). Could use the Walker to write a more exact one.
--static bool IsAnchorStart(Regexp** pre, int depth) {
-- Regexp* re = *pre;
-- Regexp* sub;
-- // The depth limit makes sure that we don't overflow
-- // the stack on a deeply nested regexp. As the comment
-- // above says, IsAnchorStart is conservative, so returning
-- // a false negative is okay. The exact limit is somewhat arbitrary.
-- if (re == NULL || depth >= 4)
-- return false;
-- switch (re->op()) {
-- default:
-- break;
-- case kRegexpConcat:
-- if (re->nsub() > 0) {
-- sub = re->sub()[0]->Incref();
-- if (IsAnchorStart(&sub, depth+1)) {
-- Regexp** subcopy = new Regexp*[re->nsub()];
-- subcopy[0] = sub; // already have reference
-- for (int i = 1; i < re->nsub(); i++)
-- subcopy[i] = re->sub()[i]->Incref();
-- *pre = Regexp::Concat(subcopy, re->nsub(), re->parse_flags());
-- delete[] subcopy;
-- re->Decref();
-- return true;
-- }
-- sub->Decref();
-- }
-- break;
-- case kRegexpCapture:
-- sub = re->sub()[0]->Incref();
-- if (IsAnchorStart(&sub, depth+1)) {
-- *pre = Regexp::Capture(sub, re->parse_flags(), re->cap());
-- re->Decref();
-- return true;
-- }
-- sub->Decref();
-- break;
-- case kRegexpBeginText:
-- *pre = Regexp::LiteralString(NULL, 0, re->parse_flags());
-- re->Decref();
-- return true;
-- }
-- return false;
--}
--
--// Is this regexp required to start at the end of the text?
--// Only approximate; can return false for complicated regexps like (a\z|b\z),
--// but handles ((a|b)\z). Could use the Walker to write a more exact one.
--static bool IsAnchorEnd(Regexp** pre, int depth) {
-- Regexp* re = *pre;
-- Regexp* sub;
-- // The depth limit makes sure that we don't overflow
-- // the stack on a deeply nested regexp. As the comment
-- // above says, IsAnchorEnd is conservative, so returning
-- // a false negative is okay. The exact limit is somewhat arbitrary.
-- if (re == NULL || depth >= 4)
-- return false;
-- switch (re->op()) {
-- default:
-- break;
-- case kRegexpConcat:
-- if (re->nsub() > 0) {
-- sub = re->sub()[re->nsub() - 1]->Incref();
-- if (IsAnchorEnd(&sub, depth+1)) {
-- Regexp** subcopy = new Regexp*[re->nsub()];
-- subcopy[re->nsub() - 1] = sub; // already have reference
-- for (int i = 0; i < re->nsub() - 1; i++)
-- subcopy[i] = re->sub()[i]->Incref();
-- *pre = Regexp::Concat(subcopy, re->nsub(), re->parse_flags());
-- delete[] subcopy;
-- re->Decref();
-- return true;
-- }
-- sub->Decref();
-- }
-- break;
-- case kRegexpCapture:
-- sub = re->sub()[0]->Incref();
-- if (IsAnchorEnd(&sub, depth+1)) {
-- *pre = Regexp::Capture(sub, re->parse_flags(), re->cap());
-- re->Decref();
-- return true;
-- }
-- sub->Decref();
-- break;
-- case kRegexpEndText:
-- *pre = Regexp::LiteralString(NULL, 0, re->parse_flags());
-- re->Decref();
-- return true;
-- }
-- return false;
--}
--
--void Compiler::Setup(Regexp::ParseFlags flags, int64 max_mem,
-- RE2::Anchor anchor) {
-- prog_->set_flags(flags);
--
-- if (flags & Regexp::Latin1)
-- encoding_ = kEncodingLatin1;
-- max_mem_ = max_mem;
-- if (max_mem <= 0) {
-- max_inst_ = 100000; // more than enough
-- } else if (max_mem <= sizeof(Prog)) {
-- // No room for anything.
-- max_inst_ = 0;
-- } else {
-- int64 m = (max_mem - sizeof(Prog)) / sizeof(Prog::Inst);
-- // Limit instruction count so that inst->id() fits nicely in an int.
-- // SparseArray also assumes that the indices (inst->id()) are ints.
-- // The call to WalkExponential uses 2*max_inst_ below,
-- // and other places in the code use 2 or 3 * prog->size().
-- // Limiting to 2^24 should avoid overflow in those places.
-- // (The point of allowing more than 32 bits of memory is to
-- // have plenty of room for the DFA states, not to use it up
-- // on the program.)
-- if (m >= 1<<24)
-- m = 1<<24;
--
-- // Inst imposes its own limit (currently bigger than 2^24 but be safe).
-- if (m > Prog::Inst::kMaxInst)
-- m = Prog::Inst::kMaxInst;
--
-- max_inst_ = m;
-- }
--
-- anchor_ = anchor;
--}
--
--// Compiles re, returning program.
--// Caller is responsible for deleting prog_.
--// If reversed is true, compiles a program that expects
--// to run over the input string backward (reverses all concatenations).
--// The reversed flag is also recorded in the returned program.
--Prog* Compiler::Compile(Regexp* re, bool reversed, int64 max_mem) {
-- Compiler c;
--
-- c.Setup(re->parse_flags(), max_mem, RE2::ANCHOR_BOTH /* unused */);
-- c.reversed_ = reversed;
--
-- // Simplify to remove things like counted repetitions
-- // and character classes like \d.
-- Regexp* sre = re->Simplify();
-- if (sre == NULL)
-- return NULL;
--
-- // Record whether prog is anchored, removing the anchors.
-- // (They get in the way of other optimizations.)
-- bool is_anchor_start = IsAnchorStart(&sre, 0);
-- bool is_anchor_end = IsAnchorEnd(&sre, 0);
--
-- // Generate fragment for entire regexp.
-- Frag f = c.WalkExponential(sre, kNullFrag, 2*c.max_inst_);
-- sre->Decref();
-- if (c.failed_)
-- return NULL;
--
-- // Success! Finish by putting Match node at end, and record start.
-- // Turn off c.reversed_ (if it is set) to force the remaining concatenations
-- // to behave normally.
-- c.reversed_ = false;
-- Frag all = c.Cat(f, c.Match(0));
-- c.prog_->set_start(all.begin);
--
-- if (reversed) {
-- c.prog_->set_anchor_start(is_anchor_end);
-- c.prog_->set_anchor_end(is_anchor_start);
-- } else {
-- c.prog_->set_anchor_start(is_anchor_start);
-- c.prog_->set_anchor_end(is_anchor_end);
-- }
--
-- // Also create unanchored version, which starts with a .*? loop.
-- if (c.prog_->anchor_start()) {
-- c.prog_->set_start_unanchored(c.prog_->start());
-- } else {
-- Frag unanchored = c.Cat(c.DotStar(), all);
-- c.prog_->set_start_unanchored(unanchored.begin);
-- }
--
-- c.prog_->set_reversed(reversed);
--
-- // Hand ownership of prog_ to caller.
-- return c.Finish();
--}
--
--Prog* Compiler::Finish() {
-- if (failed_)
-- return NULL;
--
-- if (prog_->start() == 0 && prog_->start_unanchored() == 0) {
-- // No possible matches; keep Fail instruction only.
-- inst_len_ = 1;
-- }
--
-- // Trim instruction to minimum array and transfer to Prog.
-- Trim();
-- prog_->inst_ = inst_;
-- prog_->size_ = inst_len_;
-- inst_ = NULL;
--
-- // Compute byte map.
-- prog_->ComputeByteMap();
--
-- prog_->Optimize();
--
-- // Record remaining memory for DFA.
-- if (max_mem_ <= 0) {
-- prog_->set_dfa_mem(1<<20);
-- } else {
-- int64 m = max_mem_ - sizeof(Prog) - inst_len_*sizeof(Prog::Inst);
-- if (m < 0)
-- m = 0;
-- prog_->set_dfa_mem(m);
-- }
--
-- Prog* p = prog_;
-- prog_ = NULL;
-- return p;
--}
--
--// Converts Regexp to Prog.
--Prog* Regexp::CompileToProg(int64 max_mem) {
-- return Compiler::Compile(this, false, max_mem);
--}
--
--Prog* Regexp::CompileToReverseProg(int64 max_mem) {
-- return Compiler::Compile(this, true, max_mem);
--}
--
--Frag Compiler::DotStar() {
-- return Star(ByteRange(0x00, 0xff, false), true);
--}
--
--// Compiles RE set to Prog.
--Prog* Compiler::CompileSet(const RE2::Options& options, RE2::Anchor anchor,
-- Regexp* re) {
-- Compiler c;
--
-- Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(options.ParseFlags());
-- c.Setup(pf, options.max_mem(), anchor);
--
-- // Compile alternation of fragments.
-- Frag all = c.WalkExponential(re, kNullFrag, 2*c.max_inst_);
-- re->Decref();
-- if (c.failed_)
-- return NULL;
--
-- if (anchor == RE2::UNANCHORED) {
-- // The trailing .* was added while handling kRegexpHaveMatch.
-- // We just have to add the leading one.
-- all = c.Cat(c.DotStar(), all);
-- }
--
-- c.prog_->set_start(all.begin);
-- c.prog_->set_start_unanchored(all.begin);
-- c.prog_->set_anchor_start(true);
-- c.prog_->set_anchor_end(true);
--
-- Prog* prog = c.Finish();
-- if (prog == NULL)
-- return NULL;
--
-- // Make sure DFA has enough memory to operate,
-- // since we're not going to fall back to the NFA.
-- bool failed;
-- StringPiece sp = "hello, world";
-- prog->SearchDFA(sp, sp, Prog::kAnchored, Prog::kManyMatch,
-- NULL, &failed, NULL);
-- if (failed) {
-- delete prog;
-- return NULL;
-- }
--
-- return prog;
--}
--
--Prog* Prog::CompileSet(const RE2::Options& options, RE2::Anchor anchor,
-- Regexp* re) {
-- return Compiler::CompileSet(options, anchor, re);
--}
--
--} // namespace re2
-diff --git a/re2/re2/dfa.cc b/re2/re2/dfa.cc
-deleted file mode 100644
-index 7d206fb..0000000
---- a/re2/re2/dfa.cc
-+++ /dev/null
-@@ -1,2086 +0,0 @@
--// Copyright 2008 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// A DFA (deterministic finite automaton)-based regular expression search.
--//
--// The DFA search has two main parts: the construction of the automaton,
--// which is represented by a graph of State structures, and the execution
--// of the automaton over a given input string.
--//
--// The basic idea is that the State graph is constructed so that the
--// execution can simply start with a state s, and then for each byte c in
--// the input string, execute "s = s->next[c]", checking at each point whether
--// the current s represents a matching state.
--//
--// The simple explanation just given does convey the essence of this code,
--// but it omits the details of how the State graph gets constructed as well
--// as some performance-driven optimizations to the execution of the automaton.
--// All these details are explained in the comments for the code following
--// the definition of class DFA.
--//
--// See http://swtch.com/~rsc/regexp/ for a very bare-bones equivalent.
--
--#include "re2/prog.h"
--#include "re2/stringpiece.h"
--#include "util/atomicops.h"
--#include "util/flags.h"
--#include "util/sparse_set.h"
--
--DEFINE_bool(re2_dfa_bail_when_slow, true,
-- "Whether the RE2 DFA should bail out early "
-- "if the NFA would be faster (for testing).");
--
--namespace re2 {
--
--#if !defined(__linux__) /* only Linux seems to have memrchr */
--static void* memrchr(const void* s, int c, size_t n) {
-- const unsigned char* p = (const unsigned char*)s;
-- for (p += n; n > 0; n--)
-- if (*--p == c)
-- return (void*)p;
--
-- return NULL;
--}
--#endif
--
--// Changing this to true compiles in prints that trace execution of the DFA.
--// Generates a lot of output -- only useful for debugging.
--static const bool DebugDFA = false;
--
--// A DFA implementation of a regular expression program.
--// Since this is entirely a forward declaration mandated by C++,
--// some of the comments here are better understood after reading
--// the comments in the sections that follow the DFA definition.
--class DFA {
-- public:
-- DFA(Prog* prog, Prog::MatchKind kind, int64 max_mem);
-- ~DFA();
-- bool ok() const { return !init_failed_; }
-- Prog::MatchKind kind() { return kind_; }
--
-- // Searches for the regular expression in text, which is considered
-- // as a subsection of context for the purposes of interpreting flags
-- // like ^ and $ and \A and \z.
-- // Returns whether a match was found.
-- // If a match is found, sets *ep to the end point of the best match in text.
-- // If "anchored", the match must begin at the start of text.
-- // If "want_earliest_match", the match that ends first is used, not
-- // necessarily the best one.
-- // If "run_forward" is true, the DFA runs from text.begin() to text.end().
-- // If it is false, the DFA runs from text.end() to text.begin(),
-- // returning the leftmost end of the match instead of the rightmost one.
-- // If the DFA cannot complete the search (for example, if it is out of
-- // memory), it sets *failed and returns false.
-- bool Search(const StringPiece& text, const StringPiece& context,
-- bool anchored, bool want_earliest_match, bool run_forward,
-- bool* failed, const char** ep, vector<int>* matches);
--
-- // Builds out all states for the entire DFA. FOR TESTING ONLY
-- // Returns number of states.
-- int BuildAllStates();
--
-- // Computes min and max for matching strings. Won't return strings
-- // bigger than maxlen.
-- bool PossibleMatchRange(string* min, string* max, int maxlen);
--
-- // These data structures are logically private, but C++ makes it too
-- // difficult to mark them as such.
-- class Workq;
-- class RWLocker;
-- class StateSaver;
--
-- // A single DFA state. The DFA is represented as a graph of these
-- // States, linked by the next_ pointers. If in state s and reading
-- // byte c, the next state should be s->next_[c].
-- struct State {
-- inline bool IsMatch() const { return flag_ & kFlagMatch; }
-- void SaveMatch(vector<int>* v);
--
-- int* inst_; // Instruction pointers in the state.
-- int ninst_; // # of inst_ pointers.
-- uint flag_; // Empty string bitfield flags in effect on the way
-- // into this state, along with kFlagMatch if this
-- // is a matching state.
-- State** next_; // Outgoing arrows from State,
-- // one per input byte class
-- };
--
-- enum {
-- kByteEndText = 256, // imaginary byte at end of text
--
-- kFlagEmptyMask = 0xFFF, // State.flag_: bits holding kEmptyXXX flags
-- kFlagMatch = 0x1000, // State.flag_: this is a matching state
-- kFlagLastWord = 0x2000, // State.flag_: last byte was a word char
-- kFlagNeedShift = 16, // needed kEmpty bits are or'ed in shifted left
-- };
--
-- // STL function structures for use with unordered_set.
-- struct StateEqual {
-- bool operator()(const State* a, const State* b) const {
-- if (a == b)
-- return true;
-- if (a == NULL || b == NULL)
-- return false;
-- if (a->ninst_ != b->ninst_)
-- return false;
-- if (a->flag_ != b->flag_)
-- return false;
-- for (int i = 0; i < a->ninst_; i++)
-- if (a->inst_[i] != b->inst_[i])
-- return false;
-- return true; // they're equal
-- }
-- };
-- struct StateHash {
-- size_t operator()(const State* a) const {
-- if (a == NULL)
-- return 0;
-- const char* s = reinterpret_cast<const char*>(a->inst_);
-- int len = a->ninst_ * sizeof a->inst_[0];
-- if (sizeof(size_t) == sizeof(uint32))
-- return Hash32StringWithSeed(s, len, a->flag_);
-- else
-- return Hash64StringWithSeed(s, len, a->flag_);
-- }
-- };
--
-- typedef unordered_set<State*, StateHash, StateEqual> StateSet;
--
--
-- private:
-- // Special "firstbyte" values for a state. (Values >= 0 denote actual bytes.)
-- enum {
-- kFbUnknown = -1, // No analysis has been performed.
-- kFbMany = -2, // Many bytes will lead out of this state.
-- kFbNone = -3, // No bytes lead out of this state.
-- };
--
-- enum {
-- // Indices into start_ for unanchored searches.
-- // Add kStartAnchored for anchored searches.
-- kStartBeginText = 0, // text at beginning of context
-- kStartBeginLine = 2, // text at beginning of line
-- kStartAfterWordChar = 4, // text follows a word character
-- kStartAfterNonWordChar = 6, // text follows non-word character
-- kMaxStart = 8,
--
-- kStartAnchored = 1,
-- };
--
-- // Resets the DFA State cache, flushing all saved State* information.
-- // Releases and reacquires cache_mutex_ via cache_lock, so any
-- // State* existing before the call are not valid after the call.
-- // Use a StateSaver to preserve important states across the call.
-- // cache_mutex_.r <= L < mutex_
-- // After: cache_mutex_.w <= L < mutex_
-- void ResetCache(RWLocker* cache_lock);
--
-- // Looks up and returns the State corresponding to a Workq.
-- // L >= mutex_
-- State* WorkqToCachedState(Workq* q, uint flag);
--
-- // Looks up and returns a State matching the inst, ninst, and flag.
-- // L >= mutex_
-- State* CachedState(int* inst, int ninst, uint flag);
--
-- // Clear the cache entirely.
-- // Must hold cache_mutex_.w or be in destructor.
-- void ClearCache();
--
-- // Converts a State into a Workq: the opposite of WorkqToCachedState.
-- // L >= mutex_
-- static void StateToWorkq(State* s, Workq* q);
--
-- // Runs a State on a given byte, returning the next state.
-- State* RunStateOnByteUnlocked(State*, int); // cache_mutex_.r <= L < mutex_
-- State* RunStateOnByte(State*, int); // L >= mutex_
--
-- // Runs a Workq on a given byte followed by a set of empty-string flags,
-- // producing a new Workq in nq. If a match instruction is encountered,
-- // sets *ismatch to true.
-- // L >= mutex_
-- void RunWorkqOnByte(Workq* q, Workq* nq,
-- int c, uint flag, bool* ismatch,
-- Prog::MatchKind kind,
-- int new_byte_loop);
--
-- // Runs a Workq on a set of empty-string flags, producing a new Workq in nq.
-- // L >= mutex_
-- void RunWorkqOnEmptyString(Workq* q, Workq* nq, uint flag);
--
-- // Adds the instruction id to the Workq, following empty arrows
-- // according to flag.
-- // L >= mutex_
-- void AddToQueue(Workq* q, int id, uint flag);
--
-- // For debugging, returns a text representation of State.
-- static string DumpState(State* state);
--
-- // For debugging, returns a text representation of a Workq.
-- static string DumpWorkq(Workq* q);
--
-- // Search parameters
-- struct SearchParams {
-- SearchParams(const StringPiece& text, const StringPiece& context,
-- RWLocker* cache_lock)
-- : text(text), context(context),
-- anchored(false),
-- want_earliest_match(false),
-- run_forward(false),
-- start(NULL),
-- firstbyte(kFbUnknown),
-- cache_lock(cache_lock),
-- failed(false),
-- ep(NULL),
-- matches(NULL) { }
--
-- StringPiece text;
-- StringPiece context;
-- bool anchored;
-- bool want_earliest_match;
-- bool run_forward;
-- State* start;
-- int firstbyte;
-- RWLocker *cache_lock;
-- bool failed; // "out" parameter: whether search gave up
-- const char* ep; // "out" parameter: end pointer for match
-- vector<int>* matches;
--
-- private:
-- DISALLOW_EVIL_CONSTRUCTORS(SearchParams);
-- };
--
-- // Before each search, the parameters to Search are analyzed by
-- // AnalyzeSearch to determine the state in which to start and the
-- // "firstbyte" for that state, if any.
-- struct StartInfo {
-- StartInfo() : start(NULL), firstbyte(kFbUnknown) { }
-- State* start;
-- volatile int firstbyte;
-- };
--
-- // Fills in params->start and params->firstbyte using
-- // the other search parameters. Returns true on success,
-- // false on failure.
-- // cache_mutex_.r <= L < mutex_
-- bool AnalyzeSearch(SearchParams* params);
-- bool AnalyzeSearchHelper(SearchParams* params, StartInfo* info, uint flags);
--
-- // The generic search loop, inlined to create specialized versions.
-- // cache_mutex_.r <= L < mutex_
-- // Might unlock and relock cache_mutex_ via params->cache_lock.
-- inline bool InlinedSearchLoop(SearchParams* params,
-- bool have_firstbyte,
-- bool want_earliest_match,
-- bool run_forward);
--
-- // The specialized versions of InlinedSearchLoop. The three letters
-- // at the ends of the name denote the true/false values used as the
-- // last three parameters of InlinedSearchLoop.
-- // cache_mutex_.r <= L < mutex_
-- // Might unlock and relock cache_mutex_ via params->cache_lock.
-- bool SearchFFF(SearchParams* params);
-- bool SearchFFT(SearchParams* params);
-- bool SearchFTF(SearchParams* params);
-- bool SearchFTT(SearchParams* params);
-- bool SearchTFF(SearchParams* params);
-- bool SearchTFT(SearchParams* params);
-- bool SearchTTF(SearchParams* params);
-- bool SearchTTT(SearchParams* params);
--
-- // The main search loop: calls an appropriate specialized version of
-- // InlinedSearchLoop.
-- // cache_mutex_.r <= L < mutex_
-- // Might unlock and relock cache_mutex_ via params->cache_lock.
-- bool FastSearchLoop(SearchParams* params);
--
-- // For debugging, a slow search loop that calls InlinedSearchLoop
-- // directly -- because the booleans passed are not constants, the
-- // loop is not specialized like the SearchFFF etc. versions, so it
-- // runs much more slowly. Useful only for debugging.
-- // cache_mutex_.r <= L < mutex_
-- // Might unlock and relock cache_mutex_ via params->cache_lock.
-- bool SlowSearchLoop(SearchParams* params);
--
-- // Looks up bytes in bytemap_ but handles case c == kByteEndText too.
-- int ByteMap(int c) {
-- if (c == kByteEndText)
-- return prog_->bytemap_range();
-- return prog_->bytemap()[c];
-- }
--
-- // Constant after initialization.
-- Prog* prog_; // The regular expression program to run.
-- Prog::MatchKind kind_; // The kind of DFA.
-- int start_unanchored_; // start of unanchored program
-- bool init_failed_; // initialization failed (out of memory)
--
-- Mutex mutex_; // mutex_ >= cache_mutex_.r
--
-- // Scratch areas, protected by mutex_.
-- Workq* q0_; // Two pre-allocated work queues.
-- Workq* q1_;
-- int* astack_; // Pre-allocated stack for AddToQueue
-- int nastack_;
--
-- // State* cache. Many threads use and add to the cache simultaneously,
-- // holding cache_mutex_ for reading and mutex_ (above) when adding.
-- // If the cache fills and needs to be discarded, the discarding is done
-- // while holding cache_mutex_ for writing, to avoid interrupting other
-- // readers. Any State* pointers are only valid while cache_mutex_
-- // is held.
-- Mutex cache_mutex_;
-- int64 mem_budget_; // Total memory budget for all States.
-- int64 state_budget_; // Amount of memory remaining for new States.
-- StateSet state_cache_; // All States computed so far.
-- StartInfo start_[kMaxStart];
-- bool cache_warned_; // have printed to LOG(INFO) about the cache
--};
--
--// Shorthand for casting to uint8*.
--static inline const uint8* BytePtr(const void* v) {
-- return reinterpret_cast<const uint8*>(v);
--}
--
--// Work queues
--
--// Marks separate thread groups of different priority
--// in the work queue when in leftmost-longest matching mode.
--#define Mark (-1)
--
--// Internally, the DFA uses a sparse array of
--// program instruction pointers as a work queue.
--// In leftmost longest mode, marks separate sections
--// of workq that started executing at different
--// locations in the string (earlier locations first).
--class DFA::Workq : public SparseSet {
-- public:
-- // Constructor: n is number of normal slots, maxmark number of mark slots.
-- Workq(int n, int maxmark) :
-- SparseSet(n+maxmark),
-- n_(n),
-- maxmark_(maxmark),
-- nextmark_(n),
-- last_was_mark_(true) {
-- }
--
-- bool is_mark(int i) { return i >= n_; }
--
-- int maxmark() { return maxmark_; }
--
-- void clear() {
-- SparseSet::clear();
-- nextmark_ = n_;
-- }
--
-- void mark() {
-- if (last_was_mark_)
-- return;
-- last_was_mark_ = false;
-- SparseSet::insert_new(nextmark_++);
-- }
--
-- int size() {
-- return n_ + maxmark_;
-- }
--
-- void insert(int id) {
-- if (contains(id))
-- return;
-- insert_new(id);
-- }
--
-- void insert_new(int id) {
-- last_was_mark_ = false;
-- SparseSet::insert_new(id);
-- }
--
-- private:
-- int n_; // size excluding marks
-- int maxmark_; // maximum number of marks
-- int nextmark_; // id of next mark
-- bool last_was_mark_; // last inserted was mark
-- DISALLOW_EVIL_CONSTRUCTORS(Workq);
--};
--
--DFA::DFA(Prog* prog, Prog::MatchKind kind, int64 max_mem)
-- : prog_(prog),
-- kind_(kind),
-- init_failed_(false),
-- q0_(NULL),
-- q1_(NULL),
-- astack_(NULL),
-- mem_budget_(max_mem),
-- cache_warned_(false) {
-- if (DebugDFA)
-- fprintf(stderr, "\nkind %d\n%s\n", (int)kind_, prog_->DumpUnanchored().c_str());
-- int nmark = 0;
-- start_unanchored_ = 0;
-- if (kind_ == Prog::kLongestMatch) {
-- nmark = prog->size();
-- start_unanchored_ = prog->start_unanchored();
-- }
-- nastack_ = 2 * prog->size() + nmark;
--
-- // Account for space needed for DFA, q0, q1, astack.
-- mem_budget_ -= sizeof(DFA);
-- mem_budget_ -= (prog_->size() + nmark) *
-- (sizeof(int)+sizeof(int)) * 2; // q0, q1
-- mem_budget_ -= nastack_ * sizeof(int); // astack
-- if (mem_budget_ < 0) {
-- LOG(INFO) << StringPrintf("DFA out of memory: prog size %lld mem %lld",
-- prog_->size(), max_mem);
-- init_failed_ = true;
-- return;
-- }
--
-- state_budget_ = mem_budget_;
--
-- // Make sure there is a reasonable amount of working room left.
-- // At minimum, the search requires room for two states in order
-- // to limp along, restarting frequently. We'll get better performance
-- // if there is room for a larger number of states, say 20.
-- int one_state = sizeof(State) + (prog_->size()+nmark)*sizeof(int) +
-- (prog_->bytemap_range()+1)*sizeof(State*);
-- if (state_budget_ < 20*one_state) {
-- LOG(INFO) << StringPrintf("DFA out of memory: prog size %lld mem %lld",
-- prog_->size(), max_mem);
-- init_failed_ = true;
-- return;
-- }
--
-- q0_ = new Workq(prog->size(), nmark);
-- q1_ = new Workq(prog->size(), nmark);
-- astack_ = new int[nastack_];
--}
--
--DFA::~DFA() {
-- delete q0_;
-- delete q1_;
-- delete[] astack_;
-- ClearCache();
--}
--
--// In the DFA state graph, s->next[c] == NULL means that the
--// state has not yet been computed and needs to be. We need
--// a different special value to signal that s->next[c] is a
--// state that can never lead to a match (and thus the search
--// can be called off). Hence DeadState.
--#define DeadState reinterpret_cast<State*>(1)
--
--// Signals that the rest of the string matches no matter what it is.
--#define FullMatchState reinterpret_cast<State*>(2)
--
--#define SpecialStateMax FullMatchState
--
--// Debugging printouts
--
--// For debugging, returns a string representation of the work queue.
--string DFA::DumpWorkq(Workq* q) {
-- string s;
-- const char* sep = "";
-- for (DFA::Workq::iterator it = q->begin(); it != q->end(); ++it) {
-- if (q->is_mark(*it)) {
-- StringAppendF(&s, "|");
-- sep = "";
-- } else {
-- StringAppendF(&s, "%s%d", sep, *it);
-- sep = ",";
-- }
-- }
-- return s;
--}
--
--// For debugging, returns a string representation of the state.
--string DFA::DumpState(State* state) {
-- if (state == NULL)
-- return "_";
-- if (state == DeadState)
-- return "X";
-- if (state == FullMatchState)
-- return "*";
-- string s;
-- const char* sep = "";
-- StringAppendF(&s, "(%p)", state);
-- for (int i = 0; i < state->ninst_; i++) {
-- if (state->inst_[i] == Mark) {
-- StringAppendF(&s, "|");
-- sep = "";
-- } else {
-- StringAppendF(&s, "%s%d", sep, state->inst_[i]);
-- sep = ",";
-- }
-- }
-- StringAppendF(&s, " flag=%#x", state->flag_);
-- return s;
--}
--
--//////////////////////////////////////////////////////////////////////
--//
--// DFA state graph construction.
--//
--// The DFA state graph is a heavily-linked collection of State* structures.
--// The state_cache_ is a set of all the State structures ever allocated,
--// so that if the same state is reached by two different paths,
--// the same State structure can be used. This reduces allocation
--// requirements and also avoids duplication of effort across the two
--// identical states.
--//
--// A State is defined by an ordered list of instruction ids and a flag word.
--//
--// The choice of an ordered list of instructions differs from a typical
--// textbook DFA implementation, which would use an unordered set.
--// Textbook descriptions, however, only care about whether
--// the DFA matches, not where it matches in the text. To decide where the
--// DFA matches, we need to mimic the behavior of the dominant backtracking
--// implementations like PCRE, which try one possible regular expression
--// execution, then another, then another, stopping when one of them succeeds.
--// The DFA execution tries these many executions in parallel, representing
--// each by an instruction id. These pointers are ordered in the State.inst_
--// list in the same order that the executions would happen in a backtracking
--// search: if a match is found during execution of inst_[2], inst_[i] for i>=3
--// can be discarded.
--//
--// Textbooks also typically do not consider context-aware empty string operators
--// like ^ or $. These are handled by the flag word, which specifies the set
--// of empty-string operators that should be matched when executing at the
--// current text position. These flag bits are defined in prog.h.
--// The flag word also contains two DFA-specific bits: kFlagMatch if the state
--// is a matching state (one that reached a kInstMatch in the program)
--// and kFlagLastWord if the last processed byte was a word character, for the
--// implementation of \B and \b.
--//
--// The flag word also contains, shifted up 16 bits, the bits looked for by
--// any kInstEmptyWidth instructions in the state. These provide a useful
--// summary indicating when new flags might be useful.
--//
--// The permanent representation of a State's instruction ids is just an array,
--// but while a state is being analyzed, these instruction ids are represented
--// as a Workq, which is an array that allows iteration in insertion order.
--
--// NOTE(rsc): The choice of State construction determines whether the DFA
--// mimics backtracking implementations (so-called leftmost first matching) or
--// traditional DFA implementations (so-called leftmost longest matching as
--// prescribed by POSIX). This implementation chooses to mimic the
--// backtracking implementations, because we want to replace PCRE. To get
--// POSIX behavior, the states would need to be considered not as a simple
--// ordered list of instruction ids, but as a list of unordered sets of instruction
--// ids. A match by a state in one set would inhibit the running of sets
--// farther down the list but not other instruction ids in the same set. Each
--// set would correspond to matches beginning at a given point in the string.
--// This is implemented by separating different sets with Mark pointers.
--
--// Looks in the State cache for a State matching q, flag.
--// If one is found, returns it. If one is not found, allocates one,
--// inserts it in the cache, and returns it.
--DFA::State* DFA::WorkqToCachedState(Workq* q, uint flag) {
-- if (DEBUG_MODE)
-- mutex_.AssertHeld();
--
-- // Construct array of instruction ids for the new state.
-- // Only ByteRange, EmptyWidth, and Match instructions are useful to keep:
-- // those are the only operators with any effect in
-- // RunWorkqOnEmptyString or RunWorkqOnByte.
-- int* inst = new int[q->size()];
-- int n = 0;
-- uint needflags = 0; // flags needed by kInstEmptyWidth instructions
-- bool sawmatch = false; // whether queue contains guaranteed kInstMatch
-- bool sawmark = false; // whether queue contains a Mark
-- if (DebugDFA)
-- fprintf(stderr, "WorkqToCachedState %s [%#x]", DumpWorkq(q).c_str(), flag);
-- for (Workq::iterator it = q->begin(); it != q->end(); ++it) {
-- int id = *it;
-- if (sawmatch && (kind_ == Prog::kFirstMatch || q->is_mark(id)))
-- break;
-- if (q->is_mark(id)) {
-- if (n > 0 && inst[n-1] != Mark) {
-- sawmark = true;
-- inst[n++] = Mark;
-- }
-- continue;
-- }
-- Prog::Inst* ip = prog_->inst(id);
-- switch (ip->opcode()) {
-- case kInstAltMatch:
-- // This state will continue to a match no matter what
-- // the rest of the input is. If it is the highest priority match
-- // being considered, return the special FullMatchState
-- // to indicate that it's all matches from here out.
-- if (kind_ != Prog::kManyMatch &&
-- (kind_ != Prog::kFirstMatch ||
-- (it == q->begin() && ip->greedy(prog_))) &&
-- (kind_ != Prog::kLongestMatch || !sawmark) &&
-- (flag & kFlagMatch)) {
-- delete[] inst;
-- if (DebugDFA)
-- fprintf(stderr, " -> FullMatchState\n");
-- return FullMatchState;
-- }
-- // Fall through.
-- case kInstByteRange: // These are useful.
-- case kInstEmptyWidth:
-- case kInstMatch:
-- case kInstAlt: // Not useful, but necessary [*]
-- inst[n++] = *it;
-- if (ip->opcode() == kInstEmptyWidth)
-- needflags |= ip->empty();
-- if (ip->opcode() == kInstMatch && !prog_->anchor_end())
-- sawmatch = true;
-- break;
--
-- default: // The rest are not.
-- break;
-- }
--
-- // [*] kInstAlt would seem useless to record in a state, since
-- // we've already followed both its arrows and saved all the
-- // interesting states we can reach from there. The problem
-- // is that one of the empty-width instructions might lead
-- // back to the same kInstAlt (if an empty-width operator is starred),
-- // producing a different evaluation order depending on whether
-- // we keep the kInstAlt to begin with. Sigh.
-- // A specific case that this affects is /(^|a)+/ matching "a".
-- // If we don't save the kInstAlt, we will match the whole "a" (0,1)
-- // but in fact the correct leftmost-first match is the leading "" (0,0).
-- }
-- DCHECK_LE(n, q->size());
-- if (n > 0 && inst[n-1] == Mark)
-- n--;
--
-- // If there are no empty-width instructions waiting to execute,
-- // then the extra flag bits will not be used, so there is no
-- // point in saving them. (Discarding them reduces the number
-- // of distinct states.)
-- if (needflags == 0)
-- flag &= kFlagMatch;
--
-- // NOTE(rsc): The code above cannot do flag &= needflags,
-- // because if the right flags were present to pass the current
-- // kInstEmptyWidth instructions, new kInstEmptyWidth instructions
-- // might be reached that in turn need different flags.
-- // The only sure thing is that if there are no kInstEmptyWidth
-- // instructions at all, no flags will be needed.
-- // We could do the extra work to figure out the full set of
-- // possibly needed flags by exploring past the kInstEmptyWidth
-- // instructions, but the check above -- are any flags needed
-- // at all? -- handles the most common case. More fine-grained
-- // analysis can only be justified by measurements showing that
-- // too many redundant states are being allocated.
--
-- // If there are no Insts in the list, it's a dead state,
-- // which is useful to signal with a special pointer so that
-- // the execution loop can stop early. This is only okay
-- // if the state is *not* a matching state.
-- if (n == 0 && flag == 0) {
-- delete[] inst;
-- if (DebugDFA)
-- fprintf(stderr, " -> DeadState\n");
-- return DeadState;
-- }
--
-- // If we're in longest match mode, the state is a sequence of
-- // unordered state sets separated by Marks. Sort each set
-- // to canonicalize, to reduce the number of distinct sets stored.
-- if (kind_ == Prog::kLongestMatch) {
-- int* ip = inst;
-- int* ep = ip + n;
-- while (ip < ep) {
-- int* markp = ip;
-- while (markp < ep && *markp != Mark)
-- markp++;
-- sort(ip, markp);
-- if (markp < ep)
-- markp++;
-- ip = markp;
-- }
-- }
--
-- // Save the needed empty-width flags in the top bits for use later.
-- flag |= needflags << kFlagNeedShift;
--
-- State* state = CachedState(inst, n, flag);
-- delete[] inst;
-- return state;
--}
--
--// Looks in the State cache for a State matching inst, ninst, flag.
--// If one is found, returns it. If one is not found, allocates one,
--// inserts it in the cache, and returns it.
--DFA::State* DFA::CachedState(int* inst, int ninst, uint flag) {
-- if (DEBUG_MODE)
-- mutex_.AssertHeld();
--
-- // Look in the cache for a pre-existing state.
-- State state = { inst, ninst, flag, NULL };
-- StateSet::iterator it = state_cache_.find(&state);
-- if (it != state_cache_.end()) {
-- if (DebugDFA)
-- fprintf(stderr, " -cached-> %s\n", DumpState(*it).c_str());
-- return *it;
-- }
--
-- // Must have enough memory for new state.
-- // In addition to what we're going to allocate,
-- // the state cache hash table seems to incur about 32 bytes per
-- // State*, empirically.
-- const int kStateCacheOverhead = 32;
-- int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot
-- int mem = sizeof(State) + nnext*sizeof(State*) + ninst*sizeof(int);
-- if (mem_budget_ < mem + kStateCacheOverhead) {
-- mem_budget_ = -1;
-- return NULL;
-- }
-- mem_budget_ -= mem + kStateCacheOverhead;
--
-- // Allocate new state, along with room for next and inst.
-- char* space = new char[mem];
-- State* s = reinterpret_cast<State*>(space);
-- s->next_ = reinterpret_cast<State**>(s + 1);
-- s->inst_ = reinterpret_cast<int*>(s->next_ + nnext);
-- memset(s->next_, 0, nnext*sizeof s->next_[0]);
-- memmove(s->inst_, inst, ninst*sizeof s->inst_[0]);
-- s->ninst_ = ninst;
-- s->flag_ = flag;
-- if (DebugDFA)
-- fprintf(stderr, " -> %s\n", DumpState(s).c_str());
--
-- // Put state in cache and return it.
-- state_cache_.insert(s);
-- return s;
--}
--
--// Clear the cache. Must hold cache_mutex_.w or be in destructor.
--void DFA::ClearCache() {
-- // In case state_cache_ doesn't support deleting entries
-- // during iteration, copy into a vector and then delete.
-- vector<State*> v;
-- v.reserve(state_cache_.size());
-- for (StateSet::iterator it = state_cache_.begin();
-- it != state_cache_.end(); ++it)
-- v.push_back(*it);
-- state_cache_.clear();
-- for (int i = 0; i < v.size(); i++)
-- delete[] reinterpret_cast<const char*>(v[i]);
--}
--
--// Copies insts in state s to the work queue q.
--void DFA::StateToWorkq(State* s, Workq* q) {
-- q->clear();
-- for (int i = 0; i < s->ninst_; i++) {
-- if (s->inst_[i] == Mark)
-- q->mark();
-- else
-- q->insert_new(s->inst_[i]);
-- }
--}
--
--// Adds ip to the work queue, following empty arrows according to flag
--// and expanding kInstAlt instructions (two-target gotos).
--void DFA::AddToQueue(Workq* q, int id, uint flag) {
--
-- // Use astack_ to hold our stack of states yet to process.
-- // It is sized to have room for nastack_ == 2*prog->size() + nmark
-- // instructions, which is enough: each instruction can be
-- // processed by the switch below only once, and the processing
-- // pushes at most two instructions plus maybe a mark.
-- // (If we're using marks, nmark == prog->size(); otherwise nmark == 0.)
-- int* stk = astack_;
-- int nstk = 0;
--
-- stk[nstk++] = id;
-- while (nstk > 0) {
-- DCHECK_LE(nstk, nastack_);
-- id = stk[--nstk];
--
-- if (id == Mark) {
-- q->mark();
-- continue;
-- }
--
-- if (id == 0)
-- continue;
--
-- // If ip is already on the queue, nothing to do.
-- // Otherwise add it. We don't actually keep all the ones
-- // that get added -- for example, kInstAlt is ignored
-- // when on a work queue -- but adding all ip's here
-- // increases the likelihood of q->contains(id),
-- // reducing the amount of duplicated work.
-- if (q->contains(id))
-- continue;
-- q->insert_new(id);
--
-- // Process instruction.
-- Prog::Inst* ip = prog_->inst(id);
-- switch (ip->opcode()) {
-- case kInstFail: // can't happen: discarded above
-- break;
--
-- case kInstByteRange: // just save these on the queue
-- case kInstMatch:
-- break;
--
-- case kInstCapture: // DFA treats captures as no-ops.
-- case kInstNop:
-- stk[nstk++] = ip->out();
-- break;
--
-- case kInstAlt: // two choices: expand both, in order
-- case kInstAltMatch:
-- // Want to visit out then out1, so push on stack in reverse order.
-- // This instruction is the [00-FF]* loop at the beginning of
-- // a leftmost-longest unanchored search, separate out from out1
-- // with a Mark, so that out1's threads (which will start farther
-- // to the right in the string being searched) are lower priority
-- // than the current ones.
-- stk[nstk++] = ip->out1();
-- if (q->maxmark() > 0 &&
-- id == prog_->start_unanchored() && id != prog_->start())
-- stk[nstk++] = Mark;
-- stk[nstk++] = ip->out();
-- break;
--
-- case kInstEmptyWidth:
-- if ((ip->empty() & flag) == ip->empty())
-- stk[nstk++] = ip->out();
-- break;
-- }
-- }
--}
--
--// Running of work queues. In the work queue, order matters:
--// the queue is sorted in priority order. If instruction i comes before j,
--// then the instructions that i produces during the run must come before
--// the ones that j produces. In order to keep this invariant, all the
--// work queue runners have to take an old queue to process and then
--// also a new queue to fill in. It's not acceptable to add to the end of
--// an existing queue, because new instructions will not end up in the
--// correct position.
--
--// Runs the work queue, processing the empty strings indicated by flag.
--// For example, flag == kEmptyBeginLine|kEmptyEndLine means to match
--// both ^ and $. It is important that callers pass all flags at once:
--// processing both ^ and $ is not the same as first processing only ^
--// and then processing only $. Doing the two-step sequence won't match
--// ^$^$^$ but processing ^ and $ simultaneously will (and is the behavior
--// exhibited by existing implementations).
--void DFA::RunWorkqOnEmptyString(Workq* oldq, Workq* newq, uint flag) {
-- newq->clear();
-- for (Workq::iterator i = oldq->begin(); i != oldq->end(); ++i) {
-- if (oldq->is_mark(*i))
-- AddToQueue(newq, Mark, flag);
-- else
-- AddToQueue(newq, *i, flag);
-- }
--}
--
--// Runs the work queue, processing the single byte c followed by any empty
--// strings indicated by flag. For example, c == 'a' and flag == kEmptyEndLine,
--// means to match c$. Sets the bool *ismatch to true if the end of the
--// regular expression program has been reached (the regexp has matched).
--void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq,
-- int c, uint flag, bool* ismatch,
-- Prog::MatchKind kind,
-- int new_byte_loop) {
-- if (DEBUG_MODE)
-- mutex_.AssertHeld();
--
-- newq->clear();
-- for (Workq::iterator i = oldq->begin(); i != oldq->end(); ++i) {
-- if (oldq->is_mark(*i)) {
-- if (*ismatch)
-- return;
-- newq->mark();
-- continue;
-- }
-- int id = *i;
-- Prog::Inst* ip = prog_->inst(id);
-- switch (ip->opcode()) {
-- case kInstFail: // never succeeds
-- case kInstCapture: // already followed
-- case kInstNop: // already followed
-- case kInstAlt: // already followed
-- case kInstAltMatch: // already followed
-- case kInstEmptyWidth: // already followed
-- break;
--
-- case kInstByteRange: // can follow if c is in range
-- if (ip->Matches(c))
-- AddToQueue(newq, ip->out(), flag);
-- break;
--
-- case kInstMatch:
-- if (prog_->anchor_end() && c != kByteEndText)
-- break;
-- *ismatch = true;
-- if (kind == Prog::kFirstMatch) {
-- // Can stop processing work queue since we found a match.
-- return;
-- }
-- break;
-- }
-- }
--
-- if (DebugDFA)
-- fprintf(stderr, "%s on %d[%#x] -> %s [%d]\n", DumpWorkq(oldq).c_str(),
-- c, flag, DumpWorkq(newq).c_str(), *ismatch);
--}
--
--// Processes input byte c in state, returning new state.
--// Caller does not hold mutex.
--DFA::State* DFA::RunStateOnByteUnlocked(State* state, int c) {
-- // Keep only one RunStateOnByte going
-- // even if the DFA is being run by multiple threads.
-- MutexLock l(&mutex_);
-- return RunStateOnByte(state, c);
--}
--
--// Processes input byte c in state, returning new state.
--DFA::State* DFA::RunStateOnByte(State* state, int c) {
-- if (DEBUG_MODE)
-- mutex_.AssertHeld();
-- if (state <= SpecialStateMax) {
-- if (state == FullMatchState) {
-- // It is convenient for routines like PossibleMatchRange
-- // if we implement RunStateOnByte for FullMatchState:
-- // once you get into this state you never get out,
-- // so it's pretty easy.
-- return FullMatchState;
-- }
-- if (state == DeadState) {
-- LOG(DFATAL) << "DeadState in RunStateOnByte";
-- return NULL;
-- }
-- if (state == NULL) {
-- LOG(DFATAL) << "NULL state in RunStateOnByte";
-- return NULL;
-- }
-- LOG(DFATAL) << "Unexpected special state in RunStateOnByte";
-- return NULL;
-- }
--
-- // If someone else already computed this, return it.
-- MaybeReadMemoryBarrier(); // On alpha we need to ensure read ordering
-- if (state->next_[ByteMap(c)])
-- return state->next_[ByteMap(c)];
--
-- // Convert state into Workq.
-- StateToWorkq(state, q0_);
--
-- // Flags marking the kinds of empty-width things (^ $ etc)
-- // around this byte. Before the byte we have the flags recorded
-- // in the State structure itself. After the byte we have
-- // nothing yet (but that will change: read on).
-- uint needflag = state->flag_ >> kFlagNeedShift;
-- uint beforeflag = state->flag_ & kFlagEmptyMask;
-- uint oldbeforeflag = beforeflag;
-- uint afterflag = 0;
--
-- if (c == '\n') {
-- // Insert implicit $ and ^ around \n
-- beforeflag |= kEmptyEndLine;
-- afterflag |= kEmptyBeginLine;
-- }
--
-- if (c == kByteEndText) {
-- // Insert implicit $ and \z before the fake "end text" byte.
-- beforeflag |= kEmptyEndLine | kEmptyEndText;
-- }
--
-- // The state flag kFlagLastWord says whether the last
-- // byte processed was a word character. Use that info to
-- // insert empty-width (non-)word boundaries.
-- bool islastword = state->flag_ & kFlagLastWord;
-- bool isword = (c != kByteEndText && Prog::IsWordChar(c));
-- if (isword == islastword)
-- beforeflag |= kEmptyNonWordBoundary;
-- else
-- beforeflag |= kEmptyWordBoundary;
--
-- // Okay, finally ready to run.
-- // Only useful to rerun on empty string if there are new, useful flags.
-- if (beforeflag & ~oldbeforeflag & needflag) {
-- RunWorkqOnEmptyString(q0_, q1_, beforeflag);
-- swap(q0_, q1_);
-- }
-- bool ismatch = false;
-- RunWorkqOnByte(q0_, q1_, c, afterflag, &ismatch, kind_, start_unanchored_);
-- swap(q0_, q1_);
--
-- // Save afterflag along with ismatch and isword in new state.
-- uint flag = afterflag;
-- if (ismatch)
-- flag |= kFlagMatch;
-- if (isword)
-- flag |= kFlagLastWord;
--
-- State* ns = WorkqToCachedState(q0_, flag);
--
-- // Write barrier before updating state->next_ so that the
-- // main search loop can proceed without any locking, for speed.
-- // (Otherwise it would need one mutex operation per input byte.)
-- // The annotations below tell race detectors that:
-- // a) the access to next_ should be ignored,
-- // b) 'ns' is properly published.
-- WriteMemoryBarrier(); // Flush ns before linking to it.
-- ANNOTATE_PUBLISH_MEMORY_RANGE(ns, sizeof(*ns));
--
-- ANNOTATE_IGNORE_WRITES_BEGIN();
-- state->next_[ByteMap(c)] = ns;
-- ANNOTATE_IGNORE_WRITES_END();
-- return ns;
--}
--
--
--//////////////////////////////////////////////////////////////////////
--// DFA cache reset.
--
--// Reader-writer lock helper.
--//
--// The DFA uses a reader-writer mutex to protect the state graph itself.
--// Traversing the state graph requires holding the mutex for reading,
--// and discarding the state graph and starting over requires holding the
--// lock for writing. If a search needs to expand the graph but is out
--// of memory, it will need to drop its read lock and then acquire the
--// write lock. Since it cannot then atomically downgrade from write lock
--// to read lock, it runs the rest of the search holding the write lock.
--// (This probably helps avoid repeated contention, but really the decision
--// is forced by the Mutex interface.) It's a bit complicated to keep
--// track of whether the lock is held for reading or writing and thread
--// that through the search, so instead we encapsulate it in the RWLocker
--// and pass that around.
--
--class DFA::RWLocker {
-- public:
-- explicit RWLocker(Mutex* mu);
-- ~RWLocker();
--
-- // If the lock is only held for reading right now,
-- // drop the read lock and re-acquire for writing.
-- // Subsequent calls to LockForWriting are no-ops.
-- // Notice that the lock is *released* temporarily.
-- void LockForWriting();
--
-- // Returns whether the lock is already held for writing.
-- bool IsLockedForWriting() {
-- return writing_;
-- }
--
-- private:
-- Mutex* mu_;
-- bool writing_;
--
-- DISALLOW_EVIL_CONSTRUCTORS(RWLocker);
--};
--
--DFA::RWLocker::RWLocker(Mutex* mu)
-- : mu_(mu), writing_(false) {
--
-- mu_->ReaderLock();
--}
--
--// This function is marked as NO_THREAD_SAFETY_ANALYSIS because the annotations
--// does not support lock upgrade.
--void DFA::RWLocker::LockForWriting() NO_THREAD_SAFETY_ANALYSIS {
-- if (!writing_) {
-- mu_->ReaderUnlock();
-- mu_->Lock();
-- writing_ = true;
-- }
--}
--
--DFA::RWLocker::~RWLocker() {
-- if (writing_)
-- mu_->WriterUnlock();
-- else
-- mu_->ReaderUnlock();
--}
--
--
--// When the DFA's State cache fills, we discard all the states in the
--// cache and start over. Many threads can be using and adding to the
--// cache at the same time, so we synchronize using the cache_mutex_
--// to keep from stepping on other threads. Specifically, all the
--// threads using the current cache hold cache_mutex_ for reading.
--// When a thread decides to flush the cache, it drops cache_mutex_
--// and then re-acquires it for writing. That ensures there are no
--// other threads accessing the cache anymore. The rest of the search
--// runs holding cache_mutex_ for writing, avoiding any contention
--// with or cache pollution caused by other threads.
--
--void DFA::ResetCache(RWLocker* cache_lock) {
-- // Re-acquire the cache_mutex_ for writing (exclusive use).
-- bool was_writing = cache_lock->IsLockedForWriting();
-- cache_lock->LockForWriting();
--
-- // If we already held cache_mutex_ for writing, it means
-- // this invocation of Search() has already reset the
-- // cache once already. That's a pretty clear indication
-- // that the cache is too small. Warn about that, once.
-- // TODO(rsc): Only warn if state_cache_.size() < some threshold.
-- if (was_writing && !cache_warned_) {
-- LOG(INFO) << "DFA memory cache could be too small: "
-- << "only room for " << state_cache_.size() << " states.";
-- cache_warned_ = true;
-- }
--
-- // Clear the cache, reset the memory budget.
-- for (int i = 0; i < kMaxStart; i++) {
-- start_[i].start = NULL;
-- start_[i].firstbyte = kFbUnknown;
-- }
-- ClearCache();
-- mem_budget_ = state_budget_;
--}
--
--// Typically, a couple States do need to be preserved across a cache
--// reset, like the State at the current point in the search.
--// The StateSaver class helps keep States across cache resets.
--// It makes a copy of the state's guts outside the cache (before the reset)
--// and then can be asked, after the reset, to recreate the State
--// in the new cache. For example, in a DFA method ("this" is a DFA):
--//
--// StateSaver saver(this, s);
--// ResetCache(cache_lock);
--// s = saver.Restore();
--//
--// The saver should always have room in the cache to re-create the state,
--// because resetting the cache locks out all other threads, and the cache
--// is known to have room for at least a couple states (otherwise the DFA
--// constructor fails).
--
--class DFA::StateSaver {
-- public:
-- explicit StateSaver(DFA* dfa, State* state);
-- ~StateSaver();
--
-- // Recreates and returns a state equivalent to the
-- // original state passed to the constructor.
-- // Returns NULL if the cache has filled, but
-- // since the DFA guarantees to have room in the cache
-- // for a couple states, should never return NULL
-- // if used right after ResetCache.
-- State* Restore();
--
-- private:
-- DFA* dfa_; // the DFA to use
-- int* inst_; // saved info from State
-- int ninst_;
-- uint flag_;
-- bool is_special_; // whether original state was special
-- State* special_; // if is_special_, the original state
--
-- DISALLOW_EVIL_CONSTRUCTORS(StateSaver);
--};
--
--DFA::StateSaver::StateSaver(DFA* dfa, State* state) {
-- dfa_ = dfa;
-- if (state <= SpecialStateMax) {
-- inst_ = NULL;
-- ninst_ = 0;
-- flag_ = 0;
-- is_special_ = true;
-- special_ = state;
-- return;
-- }
-- is_special_ = false;
-- special_ = NULL;
-- flag_ = state->flag_;
-- ninst_ = state->ninst_;
-- inst_ = new int[ninst_];
-- memmove(inst_, state->inst_, ninst_*sizeof inst_[0]);
--}
--
--DFA::StateSaver::~StateSaver() {
-- if (!is_special_)
-- delete[] inst_;
--}
--
--DFA::State* DFA::StateSaver::Restore() {
-- if (is_special_)
-- return special_;
-- MutexLock l(&dfa_->mutex_);
-- State* s = dfa_->CachedState(inst_, ninst_, flag_);
-- if (s == NULL)
-- LOG(DFATAL) << "StateSaver failed to restore state.";
-- return s;
--}
--
--
--//////////////////////////////////////////////////////////////////////
--//
--// DFA execution.
--//
--// The basic search loop is easy: start in a state s and then for each
--// byte c in the input, s = s->next[c].
--//
--// This simple description omits a few efficiency-driven complications.
--//
--// First, the State graph is constructed incrementally: it is possible
--// that s->next[c] is null, indicating that that state has not been
--// fully explored. In this case, RunStateOnByte must be invoked to
--// determine the next state, which is cached in s->next[c] to save
--// future effort. An alternative reason for s->next[c] to be null is
--// that the DFA has reached a so-called "dead state", in which any match
--// is no longer possible. In this case RunStateOnByte will return NULL
--// and the processing of the string can stop early.
--//
--// Second, a 256-element pointer array for s->next_ makes each State
--// quite large (2kB on 64-bit machines). Instead, dfa->bytemap_[]
--// maps from bytes to "byte classes" and then next_ only needs to have
--// as many pointers as there are byte classes. A byte class is simply a
--// range of bytes that the regexp never distinguishes between.
--// A regexp looking for a[abc] would have four byte ranges -- 0 to 'a'-1,
--// 'a', 'b' to 'c', and 'c' to 0xFF. The bytemap slows us a little bit
--// but in exchange we typically cut the size of a State (and thus our
--// memory footprint) by about 5-10x. The comments still refer to
--// s->next[c] for simplicity, but code should refer to s->next_[bytemap_[c]].
--//
--// Third, it is common for a DFA for an unanchored match to begin in a
--// state in which only one particular byte value can take the DFA to a
--// different state. That is, s->next[c] != s for only one c. In this
--// situation, the DFA can do better than executing the simple loop.
--// Instead, it can call memchr to search very quickly for the byte c.
--// Whether the start state has this property is determined during a
--// pre-compilation pass, and if so, the byte b is passed to the search
--// loop as the "firstbyte" argument, along with a boolean "have_firstbyte".
--//
--// Fourth, the desired behavior is to search for the leftmost-best match
--// (approximately, the same one that Perl would find), which is not
--// necessarily the match ending earliest in the string. Each time a
--// match is found, it must be noted, but the DFA must continue on in
--// hope of finding a higher-priority match. In some cases, the caller only
--// cares whether there is any match at all, not which one is found.
--// The "want_earliest_match" flag causes the search to stop at the first
--// match found.
--//
--// Fifth, one algorithm that uses the DFA needs it to run over the
--// input string backward, beginning at the end and ending at the beginning.
--// Passing false for the "run_forward" flag causes the DFA to run backward.
--//
--// The checks for these last three cases, which in a naive implementation
--// would be performed once per input byte, slow the general loop enough
--// to merit specialized versions of the search loop for each of the
--// eight possible settings of the three booleans. Rather than write
--// eight different functions, we write one general implementation and then
--// inline it to create the specialized ones.
--//
--// Note that matches are delayed by one byte, to make it easier to
--// accomodate match conditions depending on the next input byte (like $ and \b).
--// When s->next[c]->IsMatch(), it means that there is a match ending just
--// *before* byte c.
--
--// The generic search loop. Searches text for a match, returning
--// the pointer to the end of the chosen match, or NULL if no match.
--// The bools are equal to the same-named variables in params, but
--// making them function arguments lets the inliner specialize
--// this function to each combination (see two paragraphs above).
--inline bool DFA::InlinedSearchLoop(SearchParams* params,
-- bool have_firstbyte,
-- bool want_earliest_match,
-- bool run_forward) {
-- State* start = params->start;
-- const uint8* bp = BytePtr(params->text.begin()); // start of text
-- const uint8* p = bp; // text scanning point
-- const uint8* ep = BytePtr(params->text.end()); // end of text
-- const uint8* resetp = NULL; // p at last cache reset
-- if (!run_forward)
-- swap(p, ep);
--
-- const uint8* bytemap = prog_->bytemap();
-- const uint8* lastmatch = NULL; // most recent matching position in text
-- bool matched = false;
-- State* s = start;
--
-- if (s->IsMatch()) {
-- matched = true;
-- lastmatch = p;
-- if (want_earliest_match) {
-- params->ep = reinterpret_cast<const char*>(lastmatch);
-- return true;
-- }
-- }
--
-- while (p != ep) {
-- if (DebugDFA)
-- fprintf(stderr, "@%d: %s\n", static_cast<int>(p - bp),
-- DumpState(s).c_str());
-- if (have_firstbyte && s == start) {
-- // In start state, only way out is to find firstbyte,
-- // so use optimized assembly in memchr to skip ahead.
-- // If firstbyte isn't found, we can skip to the end
-- // of the string.
-- if (run_forward) {
-- if ((p = BytePtr(memchr(p, params->firstbyte, ep - p))) == NULL) {
-- p = ep;
-- break;
-- }
-- } else {
-- if ((p = BytePtr(memrchr(ep, params->firstbyte, p - ep))) == NULL) {
-- p = ep;
-- break;
-- }
-- p++;
-- }
-- }
--
-- int c;
-- if (run_forward)
-- c = *p++;
-- else
-- c = *--p;
--
-- // Note that multiple threads might be consulting
-- // s->next_[bytemap[c]] simultaneously.
-- // RunStateOnByte takes care of the appropriate locking,
-- // including a memory barrier so that the unlocked access
-- // (sometimes known as "double-checked locking") is safe.
-- // The alternative would be either one DFA per thread
-- // or one mutex operation per input byte.
-- //
-- // ns == DeadState means the state is known to be dead
-- // (no more matches are possible).
-- // ns == NULL means the state has not yet been computed
-- // (need to call RunStateOnByteUnlocked).
-- // RunStateOnByte returns ns == NULL if it is out of memory.
-- // ns == FullMatchState means the rest of the string matches.
-- //
-- // Okay to use bytemap[] not ByteMap() here, because
-- // c is known to be an actual byte and not kByteEndText.
--
-- MaybeReadMemoryBarrier(); // On alpha we need to ensure read ordering
-- State* ns = s->next_[bytemap[c]];
-- if (ns == NULL) {
-- ns = RunStateOnByteUnlocked(s, c);
-- if (ns == NULL) {
-- // After we reset the cache, we hold cache_mutex exclusively,
-- // so if resetp != NULL, it means we filled the DFA state
-- // cache with this search alone (without any other threads).
-- // Benchmarks show that doing a state computation on every
-- // byte runs at about 0.2 MB/s, while the NFA (nfa.cc) can do the
-- // same at about 2 MB/s. Unless we're processing an average
-- // of 10 bytes per state computation, fail so that RE2 can
-- // fall back to the NFA.
-- if (FLAGS_re2_dfa_bail_when_slow && resetp != NULL &&
-- (p - resetp) < 10*state_cache_.size()) {
-- params->failed = true;
-- return false;
-- }
-- resetp = p;
--
-- // Prepare to save start and s across the reset.
-- StateSaver save_start(this, start);
-- StateSaver save_s(this, s);
--
-- // Discard all the States in the cache.
-- ResetCache(params->cache_lock);
--
-- // Restore start and s so we can continue.
-- if ((start = save_start.Restore()) == NULL ||
-- (s = save_s.Restore()) == NULL) {
-- // Restore already did LOG(DFATAL).
-- params->failed = true;
-- return false;
-- }
-- ns = RunStateOnByteUnlocked(s, c);
-- if (ns == NULL) {
-- LOG(DFATAL) << "RunStateOnByteUnlocked failed after ResetCache";
-- params->failed = true;
-- return false;
-- }
-- }
-- }
-- if (ns <= SpecialStateMax) {
-- if (ns == DeadState) {
-- params->ep = reinterpret_cast<const char*>(lastmatch);
-- return matched;
-- }
-- // FullMatchState
-- params->ep = reinterpret_cast<const char*>(ep);
-- return true;
-- }
-- s = ns;
--
-- if (s->IsMatch()) {
-- matched = true;
-- // The DFA notices the match one byte late,
-- // so adjust p before using it in the match.
-- if (run_forward)
-- lastmatch = p - 1;
-- else
-- lastmatch = p + 1;
-- if (DebugDFA)
-- fprintf(stderr, "match @%d! [%s]\n",
-- static_cast<int>(lastmatch - bp),
-- DumpState(s).c_str());
--
-- if (want_earliest_match) {
-- params->ep = reinterpret_cast<const char*>(lastmatch);
-- return true;
-- }
-- }
-- }
--
-- // Peek in state to see if a match is coming up.
-- if (params->matches && kind_ == Prog::kManyMatch) {
-- vector<int>* v = params->matches;
-- v->clear();
-- if (s > SpecialStateMax) {
-- for (int i = 0; i < s->ninst_; i++) {
-- Prog::Inst* ip = prog_->inst(s->inst_[i]);
-- if (ip->opcode() == kInstMatch)
-- v->push_back(ip->match_id());
-- }
-- }
-- }
--
--
-- // Process one more byte to see if it triggers a match.
-- // (Remember, matches are delayed one byte.)
-- int lastbyte;
-- if (run_forward) {
-- if (params->text.end() == params->context.end())
-- lastbyte = kByteEndText;
-- else
-- lastbyte = params->text.end()[0] & 0xFF;
-- } else {
-- if (params->text.begin() == params->context.begin())
-- lastbyte = kByteEndText;
-- else
-- lastbyte = params->text.begin()[-1] & 0xFF;
-- }
--
-- MaybeReadMemoryBarrier(); // On alpha we need to ensure read ordering
-- State* ns = s->next_[ByteMap(lastbyte)];
-- if (ns == NULL) {
-- ns = RunStateOnByteUnlocked(s, lastbyte);
-- if (ns == NULL) {
-- StateSaver save_s(this, s);
-- ResetCache(params->cache_lock);
-- if ((s = save_s.Restore()) == NULL) {
-- params->failed = true;
-- return false;
-- }
-- ns = RunStateOnByteUnlocked(s, lastbyte);
-- if (ns == NULL) {
-- LOG(DFATAL) << "RunStateOnByteUnlocked failed after Reset";
-- params->failed = true;
-- return false;
-- }
-- }
-- }
-- s = ns;
-- if (DebugDFA)
-- fprintf(stderr, "@_: %s\n", DumpState(s).c_str());
-- if (s == FullMatchState) {
-- params->ep = reinterpret_cast<const char*>(ep);
-- return true;
-- }
-- if (s > SpecialStateMax && s->IsMatch()) {
-- matched = true;
-- lastmatch = p;
-- if (DebugDFA)
-- fprintf(stderr, "match @%d! [%s]\n", static_cast<int>(lastmatch - bp),
-- DumpState(s).c_str());
-- }
-- params->ep = reinterpret_cast<const char*>(lastmatch);
-- return matched;
--}
--
--// Inline specializations of the general loop.
--bool DFA::SearchFFF(SearchParams* params) {
-- return InlinedSearchLoop(params, 0, 0, 0);
--}
--bool DFA::SearchFFT(SearchParams* params) {
-- return InlinedSearchLoop(params, 0, 0, 1);
--}
--bool DFA::SearchFTF(SearchParams* params) {
-- return InlinedSearchLoop(params, 0, 1, 0);
--}
--bool DFA::SearchFTT(SearchParams* params) {
-- return InlinedSearchLoop(params, 0, 1, 1);
--}
--bool DFA::SearchTFF(SearchParams* params) {
-- return InlinedSearchLoop(params, 1, 0, 0);
--}
--bool DFA::SearchTFT(SearchParams* params) {
-- return InlinedSearchLoop(params, 1, 0, 1);
--}
--bool DFA::SearchTTF(SearchParams* params) {
-- return InlinedSearchLoop(params, 1, 1, 0);
--}
--bool DFA::SearchTTT(SearchParams* params) {
-- return InlinedSearchLoop(params, 1, 1, 1);
--}
--
--// For debugging, calls the general code directly.
--bool DFA::SlowSearchLoop(SearchParams* params) {
-- return InlinedSearchLoop(params,
-- params->firstbyte >= 0,
-- params->want_earliest_match,
-- params->run_forward);
--}
--
--// For performance, calls the appropriate specialized version
--// of InlinedSearchLoop.
--bool DFA::FastSearchLoop(SearchParams* params) {
-- // Because the methods are private, the Searches array
-- // cannot be declared at top level.
-- static bool (DFA::*Searches[])(SearchParams*) = {
-- &DFA::SearchFFF,
-- &DFA::SearchFFT,
-- &DFA::SearchFTF,
-- &DFA::SearchFTT,
-- &DFA::SearchTFF,
-- &DFA::SearchTFT,
-- &DFA::SearchTTF,
-- &DFA::SearchTTT,
-- };
--
-- bool have_firstbyte = (params->firstbyte >= 0);
-- int index = 4 * have_firstbyte +
-- 2 * params->want_earliest_match +
-- 1 * params->run_forward;
-- return (this->*Searches[index])(params);
--}
--
--
--// The discussion of DFA execution above ignored the question of how
--// to determine the initial state for the search loop. There are two
--// factors that influence the choice of start state.
--//
--// The first factor is whether the search is anchored or not.
--// The regexp program (Prog*) itself has
--// two different entry points: one for anchored searches and one for
--// unanchored searches. (The unanchored version starts with a leading ".*?"
--// and then jumps to the anchored one.)
--//
--// The second factor is where text appears in the larger context, which
--// determines which empty-string operators can be matched at the beginning
--// of execution. If text is at the very beginning of context, \A and ^ match.
--// Otherwise if text is at the beginning of a line, then ^ matches.
--// Otherwise it matters whether the character before text is a word character
--// or a non-word character.
--//
--// The two cases (unanchored vs not) and four cases (empty-string flags)
--// combine to make the eight cases recorded in the DFA's begin_text_[2],
--// begin_line_[2], after_wordchar_[2], and after_nonwordchar_[2] cached
--// StartInfos. The start state for each is filled in the first time it
--// is used for an actual search.
--
--// Examines text, context, and anchored to determine the right start
--// state for the DFA search loop. Fills in params and returns true on success.
--// Returns false on failure.
--bool DFA::AnalyzeSearch(SearchParams* params) {
-- const StringPiece& text = params->text;
-- const StringPiece& context = params->context;
--
-- // Sanity check: make sure that text lies within context.
-- if (text.begin() < context.begin() || text.end() > context.end()) {
-- LOG(DFATAL) << "Text is not inside context.";
-- params->start = DeadState;
-- return true;
-- }
--
-- // Determine correct search type.
-- int start;
-- uint flags;
-- if (params->run_forward) {
-- if (text.begin() == context.begin()) {
-- start = kStartBeginText;
-- flags = kEmptyBeginText|kEmptyBeginLine;
-- } else if (text.begin()[-1] == '\n') {
-- start = kStartBeginLine;
-- flags = kEmptyBeginLine;
-- } else if (Prog::IsWordChar(text.begin()[-1] & 0xFF)) {
-- start = kStartAfterWordChar;
-- flags = kFlagLastWord;
-- } else {
-- start = kStartAfterNonWordChar;
-- flags = 0;
-- }
-- } else {
-- if (text.end() == context.end()) {
-- start = kStartBeginText;
-- flags = kEmptyBeginText|kEmptyBeginLine;
-- } else if (text.end()[0] == '\n') {
-- start = kStartBeginLine;
-- flags = kEmptyBeginLine;
-- } else if (Prog::IsWordChar(text.end()[0] & 0xFF)) {
-- start = kStartAfterWordChar;
-- flags = kFlagLastWord;
-- } else {
-- start = kStartAfterNonWordChar;
-- flags = 0;
-- }
-- }
-- if (params->anchored || prog_->anchor_start())
-- start |= kStartAnchored;
-- StartInfo* info = &start_[start];
--
-- // Try once without cache_lock for writing.
-- // Try again after resetting the cache
-- // (ResetCache will relock cache_lock for writing).
-- if (!AnalyzeSearchHelper(params, info, flags)) {
-- ResetCache(params->cache_lock);
-- if (!AnalyzeSearchHelper(params, info, flags)) {
-- LOG(DFATAL) << "Failed to analyze start state.";
-- params->failed = true;
-- return false;
-- }
-- }
--
-- if (DebugDFA)
-- fprintf(stderr, "anchored=%d fwd=%d flags=%#x state=%s firstbyte=%d\n",
-- params->anchored, params->run_forward, flags,
-- DumpState(info->start).c_str(), info->firstbyte);
--
-- params->start = info->start;
-- params->firstbyte = info->firstbyte;
--
-- return true;
--}
--
--// Fills in info if needed. Returns true on success, false on failure.
--bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
-- uint flags) {
-- // Quick check; okay because of memory barriers below.
-- if (info->firstbyte != kFbUnknown)
-- return true;
--
-- MutexLock l(&mutex_);
-- if (info->firstbyte != kFbUnknown)
-- return true;
--
-- q0_->clear();
-- AddToQueue(q0_,
-- params->anchored ? prog_->start() : prog_->start_unanchored(),
-- flags);
-- info->start = WorkqToCachedState(q0_, flags);
-- if (info->start == NULL)
-- return false;
--
-- if (info->start == DeadState) {
-- WriteMemoryBarrier(); // Synchronize with "quick check" above.
-- info->firstbyte = kFbNone;
-- return true;
-- }
--
-- if (info->start == FullMatchState) {
-- WriteMemoryBarrier(); // Synchronize with "quick check" above.
-- info->firstbyte = kFbNone; // will be ignored
-- return true;
-- }
--
-- // Compute info->firstbyte by running state on all
-- // possible byte values, looking for a single one that
-- // leads to a different state.
-- int firstbyte = kFbNone;
-- for (int i = 0; i < 256; i++) {
-- State* s = RunStateOnByte(info->start, i);
-- if (s == NULL) {
-- WriteMemoryBarrier(); // Synchronize with "quick check" above.
-- info->firstbyte = firstbyte;
-- return false;
-- }
-- if (s == info->start)
-- continue;
-- // Goes to new state...
-- if (firstbyte == kFbNone) {
-- firstbyte = i; // ... first one
-- } else {
-- firstbyte = kFbMany; // ... too many
-- break;
-- }
-- }
-- WriteMemoryBarrier(); // Synchronize with "quick check" above.
-- info->firstbyte = firstbyte;
-- return true;
--}
--
--// The actual DFA search: calls AnalyzeSearch and then FastSearchLoop.
--bool DFA::Search(const StringPiece& text,
-- const StringPiece& context,
-- bool anchored,
-- bool want_earliest_match,
-- bool run_forward,
-- bool* failed,
-- const char** epp,
-- vector<int>* matches) {
-- *epp = NULL;
-- if (!ok()) {
-- *failed = true;
-- return false;
-- }
-- *failed = false;
--
-- if (DebugDFA) {
-- fprintf(stderr, "\nprogram:\n%s\n", prog_->DumpUnanchored().c_str());
-- fprintf(stderr, "text %s anchored=%d earliest=%d fwd=%d kind %d\n",
-- text.as_string().c_str(), anchored, want_earliest_match,
-- run_forward, kind_);
-- }
--
-- RWLocker l(&cache_mutex_);
-- SearchParams params(text, context, &l);
-- params.anchored = anchored;
-- params.want_earliest_match = want_earliest_match;
-- params.run_forward = run_forward;
-- params.matches = matches;
--
-- if (!AnalyzeSearch(¶ms)) {
-- *failed = true;
-- return false;
-- }
-- if (params.start == DeadState)
-- return NULL;
-- if (params.start == FullMatchState) {
-- if (run_forward == want_earliest_match)
-- *epp = text.begin();
-- else
-- *epp = text.end();
-- return true;
-- }
-- if (DebugDFA)
-- fprintf(stderr, "start %s\n", DumpState(params.start).c_str());
-- bool ret = FastSearchLoop(¶ms);
-- if (params.failed) {
-- *failed = true;
-- return false;
-- }
-- *epp = params.ep;
-- return ret;
--}
--
--// Deletes dfa.
--//
--// This is a separate function so that
--// prog.h can be used without moving the definition of
--// class DFA out of this file. If you set
--// prog->dfa_ = dfa;
--// then you also have to set
--// prog->delete_dfa_ = DeleteDFA;
--// so that ~Prog can delete the dfa.
--static void DeleteDFA(DFA* dfa) {
-- delete dfa;
--}
--
--DFA* Prog::GetDFA(MatchKind kind) {
-- DFA*volatile* pdfa;
-- if (kind == kFirstMatch || kind == kManyMatch) {
-- pdfa = &dfa_first_;
-- } else {
-- kind = kLongestMatch;
-- pdfa = &dfa_longest_;
-- }
--
-- // Quick check; okay because of memory barrier below.
-- DFA *dfa = *pdfa;
-- if (dfa != NULL) {
-- ANNOTATE_HAPPENS_AFTER(dfa);
-- return dfa;
-- }
--
-- MutexLock l(&dfa_mutex_);
-- dfa = *pdfa;
-- if (dfa != NULL)
-- return dfa;
--
-- // For a forward DFA, half the memory goes to each DFA.
-- // For a reverse DFA, all the memory goes to the
-- // "longest match" DFA, because RE2 never does reverse
-- // "first match" searches.
-- int64 m = dfa_mem_/2;
-- if (reversed_) {
-- if (kind == kLongestMatch || kind == kManyMatch)
-- m = dfa_mem_;
-- else
-- m = 0;
-- }
-- dfa = new DFA(this, kind, m);
-- delete_dfa_ = DeleteDFA;
--
-- // Synchronize with "quick check" above.
-- ANNOTATE_HAPPENS_BEFORE(dfa);
-- WriteMemoryBarrier();
-- *pdfa = dfa;
--
-- return dfa;
--}
--
--
--// Executes the regexp program to search in text,
--// which itself is inside the larger context. (As a convenience,
--// passing a NULL context is equivalent to passing text.)
--// Returns true if a match is found, false if not.
--// If a match is found, fills in match0->end() to point at the end of the match
--// and sets match0->begin() to text.begin(), since the DFA can't track
--// where the match actually began.
--//
--// This is the only external interface (class DFA only exists in this file).
--//
--bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context,
-- Anchor anchor, MatchKind kind,
-- StringPiece* match0, bool* failed, vector<int>* matches) {
-- *failed = false;
--
-- StringPiece context = const_context;
-- if (context.begin() == NULL)
-- context = text;
-- bool carat = anchor_start();
-- bool dollar = anchor_end();
-- if (reversed_) {
-- bool t = carat;
-- carat = dollar;
-- dollar = t;
-- }
-- if (carat && context.begin() != text.begin())
-- return false;
-- if (dollar && context.end() != text.end())
-- return false;
--
-- // Handle full match by running an anchored longest match
-- // and then checking if it covers all of text.
-- bool anchored = anchor == kAnchored || anchor_start() || kind == kFullMatch;
-- bool endmatch = false;
-- if (kind == kManyMatch) {
-- endmatch = true;
-- } else if (kind == kFullMatch || anchor_end()) {
-- endmatch = true;
-- kind = kLongestMatch;
-- }
--
-- // If the caller doesn't care where the match is (just whether one exists),
-- // then we can stop at the very first match we find, the so-called
-- // "shortest match".
-- bool want_shortest_match = false;
-- if (match0 == NULL && !endmatch) {
-- want_shortest_match = true;
-- kind = kLongestMatch;
-- }
--
-- DFA* dfa = GetDFA(kind);
-- const char* ep;
-- bool matched = dfa->Search(text, context, anchored,
-- want_shortest_match, !reversed_,
-- failed, &ep, matches);
-- if (*failed)
-- return false;
-- if (!matched)
-- return false;
-- if (endmatch && ep != (reversed_ ? text.begin() : text.end()))
-- return false;
--
-- // If caller cares, record the boundary of the match.
-- // We only know where it ends, so use the boundary of text
-- // as the beginning.
-- if (match0) {
-- if (reversed_)
-- *match0 = StringPiece(ep, text.end() - ep);
-- else
-- *match0 = StringPiece(text.begin(), ep - text.begin());
-- }
-- return true;
--}
--
--// Build out all states in DFA. Returns number of states.
--int DFA::BuildAllStates() {
-- if (!ok())
-- return 0;
--
-- // Pick out start state for unanchored search
-- // at beginning of text.
-- RWLocker l(&cache_mutex_);
-- SearchParams params(NULL, NULL, &l);
-- params.anchored = false;
-- if (!AnalyzeSearch(¶ms) || params.start <= SpecialStateMax)
-- return 0;
--
-- // Add start state to work queue.
-- StateSet queued;
-- vector<State*> q;
-- queued.insert(params.start);
-- q.push_back(params.start);
--
-- // Flood to expand every state.
-- for (int i = 0; i < q.size(); i++) {
-- State* s = q[i];
-- for (int c = 0; c < 257; c++) {
-- State* ns = RunStateOnByteUnlocked(s, c);
-- if (ns > SpecialStateMax && queued.find(ns) == queued.end()) {
-- queued.insert(ns);
-- q.push_back(ns);
-- }
-- }
-- }
--
-- return q.size();
--}
--
--// Build out all states in DFA for kind. Returns number of states.
--int Prog::BuildEntireDFA(MatchKind kind) {
-- //LOG(ERROR) << "BuildEntireDFA is only for testing.";
-- return GetDFA(kind)->BuildAllStates();
--}
--
--// Computes min and max for matching string.
--// Won't return strings bigger than maxlen.
--bool DFA::PossibleMatchRange(string* min, string* max, int maxlen) {
-- if (!ok())
-- return false;
--
-- // NOTE: if future users of PossibleMatchRange want more precision when
-- // presented with infinitely repeated elements, consider making this a
-- // parameter to PossibleMatchRange.
-- static int kMaxEltRepetitions = 0;
--
-- // Keep track of the number of times we've visited states previously. We only
-- // revisit a given state if it's part of a repeated group, so if the value
-- // portion of the map tuple exceeds kMaxEltRepetitions we bail out and set
-- // |*max| to |PrefixSuccessor(*max)|.
-- //
-- // Also note that previously_visited_states[UnseenStatePtr] will, in the STL
-- // tradition, implicitly insert a '0' value at first use. We take advantage
-- // of that property below.
-- map<State*, int> previously_visited_states;
--
-- // Pick out start state for anchored search at beginning of text.
-- RWLocker l(&cache_mutex_);
-- SearchParams params(NULL, NULL, &l);
-- params.anchored = true;
-- if (!AnalyzeSearch(¶ms))
-- return false;
-- if (params.start == DeadState) { // No matching strings
-- *min = "";
-- *max = "";
-- return true;
-- }
-- if (params.start == FullMatchState) // Every string matches: no max
-- return false;
--
-- // The DFA is essentially a big graph rooted at params.start,
-- // and paths in the graph correspond to accepted strings.
-- // Each node in the graph has potentially 256+1 arrows
-- // coming out, one for each byte plus the magic end of
-- // text character kByteEndText.
--
-- // To find the smallest possible prefix of an accepted
-- // string, we just walk the graph preferring to follow
-- // arrows with the lowest bytes possible. To find the
-- // largest possible prefix, we follow the largest bytes
-- // possible.
--
-- // The test for whether there is an arrow from s on byte j is
-- // ns = RunStateOnByteUnlocked(s, j);
-- // if (ns == NULL)
-- // return false;
-- // if (ns != DeadState && ns->ninst > 0)
-- // The RunStateOnByteUnlocked call asks the DFA to build out the graph.
-- // It returns NULL only if the DFA has run out of memory,
-- // in which case we can't be sure of anything.
-- // The second check sees whether there was graph built
-- // and whether it is interesting graph. Nodes might have
-- // ns->ninst == 0 if they exist only to represent the fact
-- // that a match was found on the previous byte.
--
-- // Build minimum prefix.
-- State* s = params.start;
-- min->clear();
-- for (int i = 0; i < maxlen; i++) {
-- if (previously_visited_states[s] > kMaxEltRepetitions) {
-- VLOG(2) << "Hit kMaxEltRepetitions=" << kMaxEltRepetitions
-- << " for state s=" << s << " and min=" << CEscape(*min);
-- break;
-- }
-- previously_visited_states[s]++;
--
-- // Stop if min is a match.
-- State* ns = RunStateOnByteUnlocked(s, kByteEndText);
-- if (ns == NULL) // DFA out of memory
-- return false;
-- if (ns != DeadState && (ns == FullMatchState || ns->IsMatch()))
-- break;
--
-- // Try to extend the string with low bytes.
-- bool extended = false;
-- for (int j = 0; j < 256; j++) {
-- ns = RunStateOnByteUnlocked(s, j);
-- if (ns == NULL) // DFA out of memory
-- return false;
-- if (ns == FullMatchState ||
-- (ns > SpecialStateMax && ns->ninst_ > 0)) {
-- extended = true;
-- min->append(1, j);
-- s = ns;
-- break;
-- }
-- }
-- if (!extended)
-- break;
-- }
--
-- // Build maximum prefix.
-- previously_visited_states.clear();
-- s = params.start;
-- max->clear();
-- for (int i = 0; i < maxlen; i++) {
-- if (previously_visited_states[s] > kMaxEltRepetitions) {
-- VLOG(2) << "Hit kMaxEltRepetitions=" << kMaxEltRepetitions
-- << " for state s=" << s << " and max=" << CEscape(*max);
-- break;
-- }
-- previously_visited_states[s] += 1;
--
-- // Try to extend the string with high bytes.
-- bool extended = false;
-- for (int j = 255; j >= 0; j--) {
-- State* ns = RunStateOnByteUnlocked(s, j);
-- if (ns == NULL)
-- return false;
-- if (ns == FullMatchState ||
-- (ns > SpecialStateMax && ns->ninst_ > 0)) {
-- extended = true;
-- max->append(1, j);
-- s = ns;
-- break;
-- }
-- }
-- if (!extended) {
-- // Done, no need for PrefixSuccessor.
-- return true;
-- }
-- }
--
-- // Stopped while still adding to *max - round aaaaaaaaaa... to aaaa...b
-- *max = PrefixSuccessor(*max);
--
-- // If there are no bytes left, we have no way to say "there is no maximum
-- // string". We could make the interface more complicated and be able to
-- // return "there is no maximum but here is a minimum", but that seems like
-- // overkill -- the most common no-max case is all possible strings, so not
-- // telling the caller that the empty string is the minimum match isn't a
-- // great loss.
-- if (max->empty())
-- return false;
--
-- return true;
--}
--
--// PossibleMatchRange for a Prog.
--bool Prog::PossibleMatchRange(string* min, string* max, int maxlen) {
-- DFA* dfa = NULL;
-- {
-- MutexLock l(&dfa_mutex_);
-- // Have to use dfa_longest_ to get all strings for full matches.
-- // For example, (a|aa) never matches aa in first-match mode.
-- if (dfa_longest_ == NULL) {
-- dfa_longest_ = new DFA(this, Prog::kLongestMatch, dfa_mem_/2);
-- delete_dfa_ = DeleteDFA;
-- }
-- dfa = dfa_longest_;
-- }
-- return dfa->PossibleMatchRange(min, max, maxlen);
--}
--
--} // namespace re2
-diff --git a/re2/re2/filtered_re2.cc b/re2/re2/filtered_re2.cc
-deleted file mode 100644
-index 9269cee..0000000
---- a/re2/re2/filtered_re2.cc
-+++ /dev/null
-@@ -1,100 +0,0 @@
--// Copyright 2009 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--#include <string>
--#include "util/util.h"
--#include "re2/filtered_re2.h"
--#include "re2/prefilter.h"
--#include "re2/prefilter_tree.h"
--
--namespace re2 {
--
--FilteredRE2::FilteredRE2()
-- : compiled_(false),
-- prefilter_tree_(new PrefilterTree()) {
--}
--
--FilteredRE2::~FilteredRE2() {
-- for (int i = 0; i < re2_vec_.size(); i++)
-- delete re2_vec_[i];
-- delete prefilter_tree_;
--}
--
--RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern,
-- const RE2::Options& options, int* id) {
-- RE2* re = new RE2(pattern, options);
-- RE2::ErrorCode code = re->error_code();
--
-- if (!re->ok()) {
-- LOG(ERROR) << "Couldn't compile regular expression, skipping: "
-- << re << " due to error " << re->error();
-- delete re;
-- } else {
-- *id = re2_vec_.size();
-- re2_vec_.push_back(re);
-- }
--
-- return code;
--}
--
--void FilteredRE2::Compile(vector<string>* atoms) {
-- if (compiled_ || re2_vec_.size() == 0) {
-- LOG(INFO) << "C: " << compiled_ << " S:" << re2_vec_.size();
-- return;
-- }
--
-- for (int i = 0; i < re2_vec_.size(); i++) {
-- Prefilter* prefilter = Prefilter::FromRE2(re2_vec_[i]);
-- prefilter_tree_->Add(prefilter);
-- }
-- atoms->clear();
-- prefilter_tree_->Compile(atoms);
-- compiled_ = true;
--}
--
--int FilteredRE2::SlowFirstMatch(const StringPiece& text) const {
-- for (int i = 0; i < re2_vec_.size(); i++)
-- if (RE2::PartialMatch(text, *re2_vec_[i]))
-- return i;
-- return -1;
--}
--
--int FilteredRE2::FirstMatch(const StringPiece& text,
-- const vector<int>& atoms) const {
-- if (!compiled_) {
-- LOG(DFATAL) << "FirstMatch called before Compile";
-- return -1;
-- }
-- vector<int> regexps;
-- prefilter_tree_->RegexpsGivenStrings(atoms, ®exps);
-- for (int i = 0; i < regexps.size(); i++)
-- if (RE2::PartialMatch(text, *re2_vec_[regexps[i]]))
-- return regexps[i];
-- return -1;
--}
--
--bool FilteredRE2::AllMatches(
-- const StringPiece& text,
-- const vector<int>& atoms,
-- vector<int>* matching_regexps) const {
-- matching_regexps->clear();
-- vector<int> regexps;
-- prefilter_tree_->RegexpsGivenStrings(atoms, ®exps);
-- for (int i = 0; i < regexps.size(); i++)
-- if (RE2::PartialMatch(text, *re2_vec_[regexps[i]]))
-- matching_regexps->push_back(regexps[i]);
-- return !matching_regexps->empty();
--}
--
--void FilteredRE2::RegexpsGivenStrings(const vector<int>& matched_atoms,
-- vector<int>* passed_regexps) {
-- prefilter_tree_->RegexpsGivenStrings(matched_atoms, passed_regexps);
--}
--
--
--void FilteredRE2::PrintPrefilter(int regexpid) {
-- prefilter_tree_->PrintPrefilter(regexpid);
--}
--
--} // namespace re2
-diff --git a/re2/re2/filtered_re2.h b/re2/re2/filtered_re2.h
-deleted file mode 100644
-index 64b35be..0000000
---- a/re2/re2/filtered_re2.h
-+++ /dev/null
-@@ -1,101 +0,0 @@
--// Copyright 2009 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// The class FilteredRE2 is used as a wrapper to multiple RE2 regexps.
--// It provides a prefilter mechanism that helps in cutting down the
--// number of regexps that need to be actually searched.
--//
--// By design, it does not include a string matching engine. This is to
--// allow the user of the class to use their favorite string match
--// engine. The overall flow is: Add all the regexps using Add, then
--// Compile the FilteredRE2. The compile returns strings that need to
--// be matched. Note that all returned strings are lowercase. For
--// applying regexps to a search text, the caller does the string
--// matching using the strings returned. When doing the string match,
--// note that the caller has to do that on lower cased version of the
--// search text. Then call FirstMatch or AllMatches with a vector of
--// indices of strings that were found in the text to get the actual
--// regexp matches.
--
--#ifndef RE2_FILTERED_RE2_H_
--#define RE2_FILTERED_RE2_H_
--
--#include <vector>
--#include "re2/re2.h"
--
--namespace re2 {
--using std::vector;
--
--class PrefilterTree;
--
--class FilteredRE2 {
-- public:
-- FilteredRE2();
-- ~FilteredRE2();
--
-- // Uses RE2 constructor to create a RE2 object (re). Returns
-- // re->error_code(). If error_code is other than NoError, then re is
-- // deleted and not added to re2_vec_.
-- RE2::ErrorCode Add(const StringPiece& pattern,
-- const RE2::Options& options,
-- int *id);
--
-- // Prepares the regexps added by Add for filtering. Returns a set
-- // of strings that the caller should check for in candidate texts.
-- // The returned strings are lowercased. When doing string matching,
-- // the search text should be lowercased first to find matching
-- // strings from the set of strings returned by Compile. Call after
-- // all Add calls are done.
-- void Compile(vector<string>* strings_to_match);
--
-- // Returns the index of the first matching regexp.
-- // Returns -1 on no match. Can be called prior to Compile.
-- // Does not do any filtering: simply tries to Match the
-- // regexps in a loop.
-- int SlowFirstMatch(const StringPiece& text) const;
--
-- // Returns the index of the first matching regexp.
-- // Returns -1 on no match. Compile has to be called before
-- // calling this.
-- int FirstMatch(const StringPiece& text,
-- const vector<int>& atoms) const;
--
-- // Returns the indices of all matching regexps, after first clearing
-- // matched_regexps.
-- bool AllMatches(const StringPiece& text,
-- const vector<int>& atoms,
-- vector<int>* matching_regexps) const;
--
-- // The number of regexps added.
-- int NumRegexps() const { return re2_vec_.size(); }
--
-- private:
--
-- // Get the individual RE2 objects. Useful for testing.
-- RE2* GetRE2(int regexpid) const { return re2_vec_[regexpid]; }
--
-- // Print prefilter.
-- void PrintPrefilter(int regexpid);
--
-- // Useful for testing and debugging.
-- void RegexpsGivenStrings(const vector<int>& matched_atoms,
-- vector<int>* passed_regexps);
--
-- // All the regexps in the FilteredRE2.
-- vector<RE2*> re2_vec_;
--
-- // Has the FilteredRE2 been compiled using Compile()
-- bool compiled_;
--
-- // An AND-OR tree of string atoms used for filtering regexps.
-- PrefilterTree* prefilter_tree_;
--
-- //DISALLOW_EVIL_CONSTRUCTORS(FilteredRE2);
-- FilteredRE2(const FilteredRE2&);
-- void operator=(const FilteredRE2&);
--};
--
--} // namespace re2
--
--#endif // RE2_FILTERED_RE2_H_
-diff --git a/re2/re2/make_perl_groups.pl b/re2/re2/make_perl_groups.pl
-deleted file mode 100755
-index d5eaa59..0000000
---- a/re2/re2/make_perl_groups.pl
-+++ /dev/null
-@@ -1,110 +0,0 @@
--#!/usr/bin/perl
--# Copyright 2008 The RE2 Authors. All Rights Reserved.
--# Use of this source code is governed by a BSD-style
--# license that can be found in the LICENSE file.
--
--# Generate table entries giving character ranges
--# for POSIX/Perl character classes. Rather than
--# figure out what the definition is, it is easier to ask
--# Perl about each letter from 0-128 and write down
--# its answer.
--
-- at posixclasses = (
-- "[:alnum:]",
-- "[:alpha:]",
-- "[:ascii:]",
-- "[:blank:]",
-- "[:cntrl:]",
-- "[:digit:]",
-- "[:graph:]",
-- "[:lower:]",
-- "[:print:]",
-- "[:punct:]",
-- "[:space:]",
-- "[:upper:]",
-- "[:word:]",
-- "[:xdigit:]",
--);
--
-- at perlclasses = (
-- "\\d",
-- "\\s",
-- "\\w",
--);
--
--sub ComputeClass($) {
-- my @ranges;
-- my ($class) = @_;
-- my $regexp = "[$class]";
-- my $start = -1;
-- for (my $i=0; $i<=129; $i++) {
-- if ($i == 129) { $i = 256; }
-- if ($i <= 128 && chr($i) =~ $regexp) {
-- if ($start < 0) {
-- $start = $i;
-- }
-- } else {
-- if ($start >= 0) {
-- push @ranges, [$start, $i-1];
-- }
-- $start = -1;
-- }
-- }
-- return @ranges;
--}
--
--sub PrintClass($$@) {
-- my ($cname, $name, @ranges) = @_;
-- print "static URange16 code${cname}[] = { /* $name */\n";
-- for (my $i=0; $i<@ranges; $i++) {
-- my @a = @{$ranges[$i]};
-- printf "\t{ 0x%x, 0x%x },\n", $a[0], $a[1];
-- }
-- print "};\n";
-- my $n = @ranges;
-- my $escname = $name;
-- $escname =~ s/\\/\\\\/g;
-- $negname = $escname;
-- if ($negname =~ /:/) {
-- $negname =~ s/:/:^/;
-- } else {
-- $negname =~ y/a-z/A-Z/;
-- }
-- return "{ \"$escname\", +1, code$cname, $n }", "{ \"$negname\", -1, code$cname, $n }";
--}
--
--my $gen = 0;
--
--sub PrintClasses($@) {
-- my ($cname, @classes) = @_;
-- my @entries;
-- foreach my $cl (@classes) {
-- my @ranges = ComputeClass($cl);
-- push @entries, PrintClass(++$gen, $cl, @ranges);
-- }
-- print "UGroup ${cname}_groups[] = {\n";
-- foreach my $e (@entries) {
-- print "\t$e,\n";
-- }
-- print "};\n";
-- my $count = @entries;
-- print "int num_${cname}_groups = $count;\n";
--}
--
--print <<EOF;
--// GENERATED BY make_perl_groups.pl; DO NOT EDIT.
--// make_perl_groups.pl >perl_groups.cc
--
--#include "re2/unicode_groups.h"
--
--namespace re2 {
--
--EOF
--
--PrintClasses("perl", @perlclasses);
--PrintClasses("posix", @posixclasses);
--
--print <<EOF;
--
--} // namespace re2
--EOF
-diff --git a/re2/re2/make_unicode_casefold.py b/re2/re2/make_unicode_casefold.py
-deleted file mode 100755
-index 3375d2e..0000000
---- a/re2/re2/make_unicode_casefold.py
-+++ /dev/null
-@@ -1,146 +0,0 @@
--#!/usr/bin/python
--# coding=utf-8
--#
--# Copyright 2008 The RE2 Authors. All Rights Reserved.
--# Use of this source code is governed by a BSD-style
--# license that can be found in the LICENSE file.
--
--# See unicode_casefold.h for description of case folding tables.
--
--"""Generate C++ table for Unicode case folding."""
--
--import unicode, sys
--
--_header = """
--// GENERATED BY make_unicode_casefold.py; DO NOT EDIT.
--// make_unicode_casefold.py >unicode_casefold.cc
--
--#include "re2/unicode_casefold.h"
--
--namespace re2 {
--
--"""
--
--_trailer = """
--
--} // namespace re2
--
--"""
--
--def _Delta(a, b):
-- """Compute the delta for b - a. Even/odd and odd/even
-- are handled specially, as described above."""
-- if a+1 == b:
-- if a%2 == 0:
-- return 'EvenOdd'
-- else:
-- return 'OddEven'
-- if a == b+1:
-- if a%2 == 0:
-- return 'OddEven'
-- else:
-- return 'EvenOdd'
-- return b - a
--
--def _AddDelta(a, delta):
-- """Return a + delta, handling EvenOdd and OddEven specially."""
-- if type(delta) == int:
-- return a+delta
-- if delta == 'EvenOdd':
-- if a%2 == 0:
-- return a+1
-- else:
-- return a-1
-- if delta == 'OddEven':
-- if a%2 == 1:
-- return a+1
-- else:
-- return a-1
-- print >>sys.stderr, "Bad Delta: ", delta
-- raise "Bad Delta"
--
--def _MakeRanges(pairs):
-- """Turn a list like [(65,97), (66, 98), ..., (90,122)]
-- into [(65, 90, +32)]."""
-- ranges = []
-- last = -100
--
-- def evenodd(last, a, b, r):
-- if a != last+1 or b != _AddDelta(a, r[2]):
-- return False
-- r[1] = a
-- return True
--
-- def evenoddpair(last, a, b, r):
-- if a != last+2:
-- return False
-- delta = r[2]
-- d = delta
-- if type(delta) is not str:
-- return False
-- if delta.endswith('Skip'):
-- d = delta[:-4]
-- else:
-- delta = d + 'Skip'
-- if b != _AddDelta(a, d):
-- return False
-- r[1] = a
-- r[2] = delta
-- return True
--
-- for a, b in pairs:
-- if ranges and evenodd(last, a, b, ranges[-1]):
-- pass
-- elif ranges and evenoddpair(last, a, b, ranges[-1]):
-- pass
-- else:
-- ranges.append([a, a, _Delta(a, b)])
-- last = a
-- return ranges
--
--# The maximum size of a case-folding group.
--# Case folding is implemented in parse.cc by a recursive process
--# with a recursion depth equal to the size of the largest
--# case-folding group, so it is important that this bound be small.
--# The current tables have no group bigger than 4.
--# If there are ever groups bigger than 10 or so, it will be
--# time to rework the code in parse.cc.
--MaxCasefoldGroup = 4
--
--def main():
-- lowergroups, casegroups = unicode.CaseGroups()
-- foldpairs = []
-- seen = {}
-- for c in casegroups:
-- if len(c) > MaxCasefoldGroup:
-- raise unicode.Error("casefold group too long: %s" % (c,))
-- for i in range(len(c)):
-- if c[i-1] in seen:
-- raise unicode.Error("bad casegroups %d -> %d" % (c[i-1], c[i]))
-- seen[c[i-1]] = True
-- foldpairs.append([c[i-1], c[i]])
--
-- lowerpairs = []
-- for lower, group in lowergroups.iteritems():
-- for g in group:
-- if g != lower:
-- lowerpairs.append([g, lower])
--
-- def printpairs(name, foldpairs):
-- foldpairs.sort()
-- foldranges = _MakeRanges(foldpairs)
-- print "// %d groups, %d pairs, %d ranges" % (len(casegroups), len(foldpairs), len(foldranges))
-- print "CaseFold unicode_%s[] = {" % (name,)
-- for lo, hi, delta in foldranges:
-- print "\t{ %d, %d, %s }," % (lo, hi, delta)
-- print "};"
-- print "int num_unicode_%s = %d;" % (name, len(foldranges),)
-- print ""
--
-- print _header
-- printpairs("casefold", foldpairs)
-- printpairs("tolower", lowerpairs)
-- print _trailer
--
--if __name__ == '__main__':
-- main()
-diff --git a/re2/re2/make_unicode_groups.py b/re2/re2/make_unicode_groups.py
-deleted file mode 100755
-index c2e25c1..0000000
---- a/re2/re2/make_unicode_groups.py
-+++ /dev/null
-@@ -1,111 +0,0 @@
--#!/usr/bin/python
--# Copyright 2008 The RE2 Authors. All Rights Reserved.
--# Use of this source code is governed by a BSD-style
--# license that can be found in the LICENSE file.
--
--"""Generate C++ tables for Unicode Script and Category groups."""
--
--import sys
--import unicode
--
--_header = """
--// GENERATED BY make_unicode_groups.py; DO NOT EDIT.
--// make_unicode_groups.py >unicode_groups.cc
--
--#include "re2/unicode_groups.h"
--
--namespace re2 {
--
--"""
--
--_trailer = """
--
--} // namespace re2
--
--"""
--
--n16 = 0
--n32 = 0
--
--def MakeRanges(codes):
-- """Turn a list like [1,2,3,7,8,9] into a range list [[1,3], [7,9]]"""
-- ranges = []
-- last = -100
-- for c in codes:
-- if c == last+1:
-- ranges[-1][1] = c
-- else:
-- ranges.append([c, c])
-- last = c
-- return ranges
--
--def PrintRanges(type, name, ranges):
-- """Print the ranges as an array of type named name."""
-- print "static %s %s[] = {" % (type, name,)
-- for lo, hi in ranges:
-- print "\t{ %d, %d }," % (lo, hi)
-- print "};"
--
--# def PrintCodes(type, name, codes):
--# """Print the codes as an array of type named name."""
--# print "static %s %s[] = {" % (type, name,)
--# for c in codes:
--# print "\t%d," % (c,)
--# print "};"
--
--def PrintGroup(name, codes):
-- """Print the data structures for the group of codes.
-- Return a UGroup literal for the group."""
--
-- # See unicode_groups.h for a description of the data structure.
--
-- # Split codes into 16-bit ranges and 32-bit ranges.
-- range16 = MakeRanges([c for c in codes if c < 65536])
-- range32 = MakeRanges([c for c in codes if c >= 65536])
--
-- # Pull singleton ranges out of range16.
-- # code16 = [lo for lo, hi in range16 if lo == hi]
-- # range16 = [[lo, hi] for lo, hi in range16 if lo != hi]
--
-- global n16
-- global n32
-- n16 += len(range16)
-- n32 += len(range32)
--
-- ugroup = "{ \"%s\", +1" % (name,)
-- # if len(code16) > 0:
-- # PrintCodes("uint16", name+"_code16", code16)
-- # ugroup += ", %s_code16, %d" % (name, len(code16))
-- # else:
-- # ugroup += ", 0, 0"
-- if len(range16) > 0:
-- PrintRanges("URange16", name+"_range16", range16)
-- ugroup += ", %s_range16, %d" % (name, len(range16))
-- else:
-- ugroup += ", 0, 0"
-- if len(range32) > 0:
-- PrintRanges("URange32", name+"_range32", range32)
-- ugroup += ", %s_range32, %d" % (name, len(range32))
-- else:
-- ugroup += ", 0, 0"
-- ugroup += " }"
-- return ugroup
--
--def main():
-- print _header
-- ugroups = []
-- for name, codes in unicode.Categories().iteritems():
-- ugroups.append(PrintGroup(name, codes))
-- for name, codes in unicode.Scripts().iteritems():
-- ugroups.append(PrintGroup(name, codes))
-- print "// %d 16-bit ranges, %d 32-bit ranges" % (n16, n32)
-- print "UGroup unicode_groups[] = {";
-- ugroups.sort()
-- for ug in ugroups:
-- print "\t%s," % (ug,)
-- print "};"
-- print "int num_unicode_groups = %d;" % (len(ugroups),)
-- print _trailer
--
--if __name__ == '__main__':
-- main()
-diff --git a/re2/re2/mimics_pcre.cc b/re2/re2/mimics_pcre.cc
-deleted file mode 100644
-index fc6dd4a..0000000
---- a/re2/re2/mimics_pcre.cc
-+++ /dev/null
-@@ -1,185 +0,0 @@
--// Copyright 2008 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Determine whether this library should match PCRE exactly
--// for a particular Regexp. (If so, the testing framework can
--// check that it does.)
--//
--// This library matches PCRE except in these cases:
--// * the regexp contains a repetition of an empty string,
--// like (a*)* or (a*)+. In this case, PCRE will treat
--// the repetition sequence as ending with an empty string,
--// while this library does not.
--// * Perl and PCRE differ on whether \v matches \n.
--// For historical reasons, this library implements the Perl behavior.
--// * Perl and PCRE allow $ in one-line mode to match either the very
--// end of the text or just before a \n at the end of the text.
--// This library requires it to match only the end of the text.
--// * Similarly, Perl and PCRE do not allow ^ in multi-line mode to
--// match the end of the text if the last character is a \n.
--// This library does allow it.
--//
--// Regexp::MimicsPCRE checks for any of these conditions.
--
--#include "util/util.h"
--#include "re2/regexp.h"
--#include "re2/walker-inl.h"
--
--namespace re2 {
--
--// Returns whether re might match an empty string.
--static bool CanBeEmptyString(Regexp *re);
--
--// Walker class to compute whether library handles a regexp
--// exactly as PCRE would. See comment at top for conditions.
--
--class PCREWalker : public Regexp::Walker<bool> {
-- public:
-- PCREWalker() {}
-- bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg, bool* child_args,
-- int nchild_args);
--
-- bool ShortVisit(Regexp* re, bool a) {
-- // Should never be called: we use Walk not WalkExponential.
-- LOG(DFATAL) << "EmptyStringWalker::ShortVisit called";
-- return a;
-- }
--};
--
--// Called after visiting each of re's children and accumulating
--// the return values in child_args. So child_args contains whether
--// this library mimics PCRE for those subexpressions.
--bool PCREWalker::PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
-- bool* child_args, int nchild_args) {
-- // If children failed, so do we.
-- for (int i = 0; i < nchild_args; i++)
-- if (!child_args[i])
-- return false;
--
-- // Otherwise look for other reasons to fail.
-- switch (re->op()) {
-- // Look for repeated empty string.
-- case kRegexpStar:
-- case kRegexpPlus:
-- case kRegexpQuest:
-- if (CanBeEmptyString(re->sub()[0]))
-- return false;
-- break;
-- case kRegexpRepeat:
-- if (re->max() == -1 && CanBeEmptyString(re->sub()[0]))
-- return false;
-- break;
--
-- // Look for \v
-- case kRegexpLiteral:
-- if (re->rune() == '\v')
-- return false;
-- break;
--
-- // Look for $ in single-line mode.
-- case kRegexpEndText:
-- case kRegexpEmptyMatch:
-- if (re->parse_flags() & Regexp::WasDollar)
-- return false;
-- break;
--
-- // Look for ^ in multi-line mode.
-- case kRegexpBeginLine:
-- // No condition: in single-line mode ^ becomes kRegexpBeginText.
-- return false;
--
-- default:
-- break;
-- }
--
-- // Not proven guilty.
-- return true;
--}
--
--// Returns whether this regexp's behavior will mimic PCRE's exactly.
--bool Regexp::MimicsPCRE() {
-- PCREWalker w;
-- return w.Walk(this, true);
--}
--
--
--// Walker class to compute whether a Regexp can match an empty string.
--// It is okay to overestimate. For example, \b\B cannot match an empty
--// string, because \b and \B are mutually exclusive, but this isn't
--// that smart and will say it can. Spurious empty strings
--// will reduce the number of regexps we sanity check against PCRE,
--// but they won't break anything.
--
--class EmptyStringWalker : public Regexp::Walker<bool> {
-- public:
-- EmptyStringWalker() { }
-- bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
-- bool* child_args, int nchild_args);
--
-- bool ShortVisit(Regexp* re, bool a) {
-- // Should never be called: we use Walk not WalkExponential.
-- LOG(DFATAL) << "EmptyStringWalker::ShortVisit called";
-- return a;
-- }
--
-- private:
-- DISALLOW_EVIL_CONSTRUCTORS(EmptyStringWalker);
--};
--
--// Called after visiting re's children. child_args contains the return
--// value from each of the children's PostVisits (i.e., whether each child
--// can match an empty string). Returns whether this clause can match an
--// empty string.
--bool EmptyStringWalker::PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
-- bool* child_args, int nchild_args) {
-- switch (re->op()) {
-- case kRegexpNoMatch: // never empty
-- case kRegexpLiteral:
-- case kRegexpAnyChar:
-- case kRegexpAnyByte:
-- case kRegexpCharClass:
-- case kRegexpLiteralString:
-- return false;
--
-- case kRegexpEmptyMatch: // always empty
-- case kRegexpBeginLine: // always empty, when they match
-- case kRegexpEndLine:
-- case kRegexpNoWordBoundary:
-- case kRegexpWordBoundary:
-- case kRegexpBeginText:
-- case kRegexpEndText:
-- case kRegexpStar: // can always be empty
-- case kRegexpQuest:
-- case kRegexpHaveMatch:
-- return true;
--
-- case kRegexpConcat: // can be empty if all children can
-- for (int i = 0; i < nchild_args; i++)
-- if (!child_args[i])
-- return false;
-- return true;
--
-- case kRegexpAlternate: // can be empty if any child can
-- for (int i = 0; i < nchild_args; i++)
-- if (child_args[i])
-- return true;
-- return false;
--
-- case kRegexpPlus: // can be empty if the child can
-- case kRegexpCapture:
-- return child_args[0];
--
-- case kRegexpRepeat: // can be empty if child can or is x{0}
-- return child_args[0] || re->min() == 0;
-- }
-- return false;
--}
--
--// Returns whether re can match an empty string.
--static bool CanBeEmptyString(Regexp* re) {
-- EmptyStringWalker w;
-- return w.Walk(re, true);
--}
--
--} // namespace re2
-diff --git a/re2/re2/nfa.cc b/re2/re2/nfa.cc
-deleted file mode 100644
-index 61a4ecf..0000000
---- a/re2/re2/nfa.cc
-+++ /dev/null
-@@ -1,709 +0,0 @@
--// Copyright 2006-2007 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Tested by search_test.cc.
--//
--// Prog::SearchNFA, an NFA search.
--// This is an actual NFA like the theorists talk about,
--// not the pseudo-NFA found in backtracking regexp implementations.
--//
--// IMPLEMENTATION
--//
--// This algorithm is a variant of one that appeared in Rob Pike's sam editor,
--// which is a variant of the one described in Thompson's 1968 CACM paper.
--// See http://swtch.com/~rsc/regexp/ for various history. The main feature
--// over the DFA implementation is that it tracks submatch boundaries.
--//
--// When the choice of submatch boundaries is ambiguous, this particular
--// implementation makes the same choices that traditional backtracking
--// implementations (in particular, Perl and PCRE) do.
--// Note that unlike in Perl and PCRE, this algorithm *cannot* take exponential
--// time in the length of the input.
--//
--// Like Thompson's original machine and like the DFA implementation, this
--// implementation notices a match only once it is one byte past it.
--
--#include "re2/prog.h"
--#include "re2/regexp.h"
--#include "util/sparse_array.h"
--#include "util/sparse_set.h"
--
--namespace re2 {
--
--class NFA {
-- public:
-- NFA(Prog* prog);
-- ~NFA();
--
-- // Searches for a matching string.
-- // * If anchored is true, only considers matches starting at offset.
-- // Otherwise finds lefmost match at or after offset.
-- // * If longest is true, returns the longest match starting
-- // at the chosen start point. Otherwise returns the so-called
-- // left-biased match, the one traditional backtracking engines
-- // (like Perl and PCRE) find.
-- // Records submatch boundaries in submatch[1..nsubmatch-1].
-- // Submatch[0] is the entire match. When there is a choice in
-- // which text matches each subexpression, the submatch boundaries
-- // are chosen to match what a backtracking implementation would choose.
-- bool Search(const StringPiece& text, const StringPiece& context,
-- bool anchored, bool longest,
-- StringPiece* submatch, int nsubmatch);
--
-- static const int Debug = 0;
--
-- private:
-- struct Thread {
-- union {
-- int id;
-- Thread* next; // when on free list
-- };
-- const char** capture;
-- };
--
-- // State for explicit stack in AddToThreadq.
-- struct AddState {
-- int id; // Inst to process
-- int j;
-- const char* cap_j; // if j>=0, set capture[j] = cap_j before processing ip
--
-- AddState()
-- : id(0), j(-1), cap_j(NULL) {}
-- explicit AddState(int id)
-- : id(id), j(-1), cap_j(NULL) {}
-- AddState(int id, const char* cap_j, int j)
-- : id(id), j(j), cap_j(cap_j) {}
-- };
--
-- // Threadq is a list of threads. The list is sorted by the order
-- // in which Perl would explore that particular state -- the earlier
-- // choices appear earlier in the list.
-- typedef SparseArray<Thread*> Threadq;
--
-- inline Thread* AllocThread();
-- inline void FreeThread(Thread*);
--
-- // Add r (or its children, following unlabeled arrows)
-- // to the workqueue q with associated capture info.
-- void AddToThreadq(Threadq* q, int id, int flag,
-- const char* p, const char** capture);
--
-- // Run runq on byte c, appending new states to nextq.
-- // Updates matched_ and match_ as new, better matches are found.
-- // p is position of the next byte (the one after c)
-- // in the input string, used when processing capturing parens.
-- // flag is the bitwise or of Bol, Eol, etc., specifying whether
-- // ^, $ and \b match the current input point (after c).
-- inline int Step(Threadq* runq, Threadq* nextq, int c, int flag, const char* p);
--
-- // Returns text version of capture information, for debugging.
-- string FormatCapture(const char** capture);
--
-- inline void CopyCapture(const char** dst, const char** src);
--
-- // Computes whether all matches must begin with the same first
-- // byte, and if so, returns that byte. If not, returns -1.
-- int ComputeFirstByte();
--
-- Prog* prog_; // underlying program
-- int start_; // start instruction in program
-- int ncapture_; // number of submatches to track
-- bool longest_; // whether searching for longest match
-- bool endmatch_; // whether match must end at text.end()
-- const char* btext_; // beginning of text being matched (for FormatSubmatch)
-- const char* etext_; // end of text being matched (for endmatch_)
-- Threadq q0_, q1_; // pre-allocated for Search.
-- const char** match_; // best match so far
-- bool matched_; // any match so far?
-- AddState* astack_; // pre-allocated for AddToThreadq
-- int nastack_;
-- int first_byte_; // required first byte for match, or -1 if none
--
-- Thread* free_threads_; // free list
--
-- DISALLOW_EVIL_CONSTRUCTORS(NFA);
--};
--
--NFA::NFA(Prog* prog) {
-- prog_ = prog;
-- start_ = prog->start();
-- ncapture_ = 0;
-- longest_ = false;
-- endmatch_ = false;
-- btext_ = NULL;
-- etext_ = NULL;
-- q0_.resize(prog_->size());
-- q1_.resize(prog_->size());
-- nastack_ = 2*prog_->size();
-- astack_ = new AddState[nastack_];
-- match_ = NULL;
-- matched_ = false;
-- free_threads_ = NULL;
-- first_byte_ = ComputeFirstByte();
--}
--
--NFA::~NFA() {
-- delete[] match_;
-- delete[] astack_;
-- Thread* next;
-- for (Thread* t = free_threads_; t; t = next) {
-- next = t->next;
-- delete[] t->capture;
-- delete t;
-- }
--}
--
--void NFA::FreeThread(Thread *t) {
-- if (t == NULL)
-- return;
-- t->next = free_threads_;
-- free_threads_ = t;
--}
--
--NFA::Thread* NFA::AllocThread() {
-- Thread* t = free_threads_;
-- if (t == NULL) {
-- t = new Thread;
-- t->capture = new const char*[ncapture_];
-- return t;
-- }
-- free_threads_ = t->next;
-- return t;
--}
--
--void NFA::CopyCapture(const char** dst, const char** src) {
-- for (int i = 0; i < ncapture_; i+=2) {
-- dst[i] = src[i];
-- dst[i+1] = src[i+1];
-- }
--}
--
--// Follows all empty arrows from r and enqueues all the states reached.
--// The bits in flag (Bol, Eol, etc.) specify whether ^, $ and \b match.
--// The pointer p is the current input position, and m is the
--// current set of match boundaries.
--void NFA::AddToThreadq(Threadq* q, int id0, int flag,
-- const char* p, const char** capture) {
-- if (id0 == 0)
-- return;
--
-- // Astack_ is pre-allocated to avoid resize operations.
-- // It has room for 2*prog_->size() entries, which is enough:
-- // Each inst in prog can be processed at most once,
-- // pushing at most two entries on stk.
--
-- int nstk = 0;
-- AddState* stk = astack_;
-- stk[nstk++] = AddState(id0);
--
-- while (nstk > 0) {
-- DCHECK_LE(nstk, nastack_);
-- const AddState& a = stk[--nstk];
-- if (a.j >= 0)
-- capture[a.j] = a.cap_j;
--
-- int id = a.id;
-- if (id == 0)
-- continue;
-- if (q->has_index(id)) {
-- if (Debug)
-- fprintf(stderr, " [%d%s]\n", id, FormatCapture(capture).c_str());
-- continue;
-- }
--
-- // Create entry in q no matter what. We might fill it in below,
-- // or we might not. Even if not, it is necessary to have it,
-- // so that we don't revisit r during the recursion.
-- q->set_new(id, NULL);
--
-- Thread** tp = &q->find(id)->second;
-- int j;
-- Thread* t;
-- Prog::Inst* ip = prog_->inst(id);
-- switch (ip->opcode()) {
-- default:
-- LOG(DFATAL) << "unhandled " << ip->opcode() << " in AddToThreadq";
-- break;
--
-- case kInstFail:
-- break;
--
-- case kInstAltMatch:
-- // Save state; will pick up at next byte.
-- t = AllocThread();
-- t->id = id;
-- CopyCapture(t->capture, capture);
-- *tp = t;
-- // fall through
--
-- case kInstAlt:
-- // Explore alternatives.
-- stk[nstk++] = AddState(ip->out1());
-- stk[nstk++] = AddState(ip->out());
-- break;
--
-- case kInstNop:
-- // Continue on.
-- stk[nstk++] = AddState(ip->out());
-- break;
--
-- case kInstCapture:
-- if ((j=ip->cap()) < ncapture_) {
-- // Push a dummy whose only job is to restore capture[j]
-- // once we finish exploring this possibility.
-- stk[nstk++] = AddState(0, capture[j], j);
--
-- // Record capture.
-- capture[j] = p;
-- }
-- stk[nstk++] = AddState(ip->out());
-- break;
--
-- case kInstMatch:
-- case kInstByteRange:
-- // Save state; will pick up at next byte.
-- t = AllocThread();
-- t->id = id;
-- CopyCapture(t->capture, capture);
-- *tp = t;
-- if (Debug)
-- fprintf(stderr, " + %d%s [%p]\n", id, FormatCapture(t->capture).c_str(), t);
-- break;
--
-- case kInstEmptyWidth:
-- // Continue on if we have all the right flag bits.
-- if (ip->empty() & ~flag)
-- break;
-- stk[nstk++] = AddState(ip->out());
-- break;
-- }
-- }
--}
--
--// Run runq on byte c, appending new states to nextq.
--// Updates match as new, better matches are found.
--// p is position of the byte c in the input string,
--// used when processing capturing parens.
--// flag is the bitwise or of Bol, Eol, etc., specifying whether
--// ^, $ and \b match the current input point (after c).
--// Frees all the threads on runq.
--// If there is a shortcut to the end, returns that shortcut.
--int NFA::Step(Threadq* runq, Threadq* nextq, int c, int flag, const char* p) {
-- nextq->clear();
--
-- for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {
-- Thread* t = i->second;
-- if (t == NULL)
-- continue;
--
-- if (longest_) {
-- // Can skip any threads started after our current best match.
-- if (matched_ && match_[0] < t->capture[0]) {
-- FreeThread(t);
-- continue;
-- }
-- }
--
-- int id = t->id;
-- Prog::Inst* ip = prog_->inst(id);
--
-- switch (ip->opcode()) {
-- default:
-- // Should only see the values handled below.
-- LOG(DFATAL) << "Unhandled " << ip->opcode() << " in step";
-- break;
--
-- case kInstByteRange:
-- if (ip->Matches(c))
-- AddToThreadq(nextq, ip->out(), flag, p+1, t->capture);
-- break;
--
-- case kInstAltMatch:
-- if (i != runq->begin())
-- break;
-- // The match is ours if we want it.
-- if (ip->greedy(prog_) || longest_) {
-- CopyCapture((const char**)match_, t->capture);
-- FreeThread(t);
-- for (++i; i != runq->end(); ++i)
-- FreeThread(i->second);
-- runq->clear();
-- matched_ = true;
-- if (ip->greedy(prog_))
-- return ip->out1();
-- return ip->out();
-- }
-- break;
--
-- case kInstMatch:
-- if (endmatch_ && p != etext_)
-- break;
--
-- const char* old = t->capture[1]; // previous end pointer
-- t->capture[1] = p;
-- if (longest_) {
-- // Leftmost-longest mode: save this match only if
-- // it is either farther to the left or at the same
-- // point but longer than an existing match.
-- if (!matched_ || t->capture[0] < match_[0] ||
-- (t->capture[0] == match_[0] && t->capture[1] > match_[1]))
-- CopyCapture((const char**)match_, t->capture);
-- } else {
-- // Leftmost-biased mode: this match is by definition
-- // better than what we've already found (see next line).
-- CopyCapture((const char**)match_, t->capture);
--
-- // Cut off the threads that can only find matches
-- // worse than the one we just found: don't run the
-- // rest of the current Threadq.
-- t->capture[0] = old;
-- FreeThread(t);
-- for (++i; i != runq->end(); ++i)
-- FreeThread(i->second);
-- runq->clear();
-- matched_ = true;
-- return 0;
-- }
-- t->capture[0] = old;
-- matched_ = true;
-- break;
-- }
-- FreeThread(t);
-- }
-- runq->clear();
-- return 0;
--}
--
--string NFA::FormatCapture(const char** capture) {
-- string s;
--
-- for (int i = 0; i < ncapture_; i+=2) {
-- if (capture[i] == NULL)
-- StringAppendF(&s, "(?,?)");
-- else if (capture[i+1] == NULL)
-- StringAppendF(&s, "(%d,?)", (int)(capture[i] - btext_));
-- else
-- StringAppendF(&s, "(%d,%d)",
-- (int)(capture[i] - btext_),
-- (int)(capture[i+1] - btext_));
-- }
-- return s;
--}
--
--// Returns whether haystack contains needle's memory.
--static bool StringPieceContains(const StringPiece haystack, const StringPiece needle) {
-- return haystack.begin() <= needle.begin() &&
-- haystack.end() >= needle.end();
--}
--
--bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
-- bool anchored, bool longest,
-- StringPiece* submatch, int nsubmatch) {
-- if (start_ == 0)
-- return false;
--
-- StringPiece context = const_context;
-- if (context.begin() == NULL)
-- context = text;
--
-- if (!StringPieceContains(context, text)) {
-- LOG(FATAL) << "Bad args: context does not contain text "
-- << reinterpret_cast<const void*>(context.begin())
-- << "+" << context.size() << " "
-- << reinterpret_cast<const void*>(text.begin())
-- << "+" << text.size();
-- return false;
-- }
--
-- if (prog_->anchor_start() && context.begin() != text.begin())
-- return false;
-- if (prog_->anchor_end() && context.end() != text.end())
-- return false;
-- anchored |= prog_->anchor_start();
-- if (prog_->anchor_end()) {
-- longest = true;
-- endmatch_ = true;
-- etext_ = text.end();
-- }
--
-- if (nsubmatch < 0) {
-- LOG(DFATAL) << "Bad args: nsubmatch=" << nsubmatch;
-- return false;
-- }
--
-- // Save search parameters.
-- ncapture_ = 2*nsubmatch;
-- longest_ = longest;
--
-- if (nsubmatch == 0) {
-- // We need to maintain match[0], both to distinguish the
-- // longest match (if longest is true) and also to tell
-- // whether we've seen any matches at all.
-- ncapture_ = 2;
-- }
--
-- match_ = new const char*[ncapture_];
-- matched_ = false;
-- memset(match_, 0, ncapture_*sizeof match_[0]);
--
-- // For debugging prints.
-- btext_ = context.begin();
--
-- if (Debug) {
-- fprintf(stderr, "NFA::Search %s (context: %s) anchored=%d longest=%d\n",
-- text.as_string().c_str(), context.as_string().c_str(), anchored,
-- longest);
-- }
--
-- // Set up search.
-- Threadq* runq = &q0_;
-- Threadq* nextq = &q1_;
-- runq->clear();
-- nextq->clear();
-- memset(&match_[0], 0, ncapture_*sizeof match_[0]);
-- const char* bp = context.begin();
-- int c = -1;
-- int wasword = 0;
--
-- if (text.begin() > context.begin()) {
-- c = text.begin()[-1] & 0xFF;
-- wasword = Prog::IsWordChar(c);
-- }
--
-- // Loop over the text, stepping the machine.
-- for (const char* p = text.begin();; p++) {
-- // Check for empty-width specials.
-- int flag = 0;
--
-- // ^ and \A
-- if (p == context.begin())
-- flag |= kEmptyBeginText | kEmptyBeginLine;
-- else if (p <= context.end() && p[-1] == '\n')
-- flag |= kEmptyBeginLine;
--
-- // $ and \z
-- if (p == context.end())
-- flag |= kEmptyEndText | kEmptyEndLine;
-- else if (p < context.end() && p[0] == '\n')
-- flag |= kEmptyEndLine;
--
-- // \b and \B
-- int isword = 0;
-- if (p < context.end())
-- isword = Prog::IsWordChar(p[0] & 0xFF);
--
-- if (isword != wasword)
-- flag |= kEmptyWordBoundary;
-- else
-- flag |= kEmptyNonWordBoundary;
--
-- if (Debug) {
-- fprintf(stderr, "%c[%#x/%d/%d]:", p > text.end() ? '$' : p == bp ? '^' : c, flag, isword, wasword);
-- for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {
-- Thread* t = i->second;
-- if (t == NULL)
-- continue;
-- fprintf(stderr, " %d%s", t->id,
-- FormatCapture((const char**)t->capture).c_str());
-- }
-- fprintf(stderr, "\n");
-- }
--
-- // Process previous character (waited until now to avoid
-- // repeating the flag computation above).
-- // This is a no-op the first time around the loop, because
-- // runq is empty.
-- int id = Step(runq, nextq, c, flag, p-1);
-- DCHECK_EQ(runq->size(), 0);
-- swap(nextq, runq);
-- nextq->clear();
-- if (id != 0) {
-- // We're done: full match ahead.
-- p = text.end();
-- for (;;) {
-- Prog::Inst* ip = prog_->inst(id);
-- switch (ip->opcode()) {
-- default:
-- LOG(DFATAL) << "Unexpected opcode in short circuit: " << ip->opcode();
-- break;
--
-- case kInstCapture:
-- match_[ip->cap()] = p;
-- id = ip->out();
-- continue;
--
-- case kInstNop:
-- id = ip->out();
-- continue;
--
-- case kInstMatch:
-- match_[1] = p;
-- matched_ = true;
-- break;
--
-- case kInstEmptyWidth:
-- if (ip->empty() & ~(kEmptyEndLine|kEmptyEndText)) {
-- LOG(DFATAL) << "Unexpected empty-width in short circuit: " << ip->empty();
-- break;
-- }
-- id = ip->out();
-- continue;
-- }
-- break;
-- }
-- break;
-- }
--
-- if (p > text.end())
-- break;
--
-- // Start a new thread if there have not been any matches.
-- // (No point in starting a new thread if there have been
-- // matches, since it would be to the right of the match
-- // we already found.)
-- if (!matched_ && (!anchored || p == text.begin())) {
-- // If there's a required first byte for an unanchored search
-- // and we're not in the middle of any possible matches,
-- // use memchr to search for the byte quickly.
-- if (!anchored && first_byte_ >= 0 && runq->size() == 0 &&
-- p < text.end() && (p[0] & 0xFF) != first_byte_) {
-- p = reinterpret_cast<const char*>(memchr(p, first_byte_,
-- text.end() - p));
-- if (p == NULL) {
-- p = text.end();
-- isword = 0;
-- } else {
-- isword = Prog::IsWordChar(p[0] & 0xFF);
-- }
-- flag = Prog::EmptyFlags(context, p);
-- }
--
-- // Steal match storage (cleared but unused as of yet)
-- // temporarily to hold match boundaries for new thread.
-- match_[0] = p;
-- AddToThreadq(runq, start_, flag, p, match_);
-- match_[0] = NULL;
-- }
--
-- // If all the threads have died, stop early.
-- if (runq->size() == 0) {
-- if (Debug)
-- fprintf(stderr, "dead\n");
-- break;
-- }
--
-- if (p == text.end())
-- c = 0;
-- else
-- c = *p & 0xFF;
-- wasword = isword;
--
-- // Will run step(runq, nextq, c, ...) on next iteration. See above.
-- }
--
-- for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i)
-- FreeThread(i->second);
--
-- if (matched_) {
-- for (int i = 0; i < nsubmatch; i++)
-- submatch[i].set(match_[2*i], match_[2*i+1] - match_[2*i]);
-- if (Debug)
-- fprintf(stderr, "match (%d,%d)\n",
-- static_cast<int>(match_[0] - btext_),
-- static_cast<int>(match_[1] - btext_));
-- return true;
-- }
-- VLOG(1) << "No matches found";
-- return false;
--}
--
--// Computes whether all successful matches have a common first byte,
--// and if so, returns that byte. If not, returns -1.
--int NFA::ComputeFirstByte() {
-- if (start_ == 0)
-- return -1;
--
-- int b = -1; // first byte, not yet computed
--
-- typedef SparseSet Workq;
-- Workq q(prog_->size());
-- q.insert(start_);
-- for (Workq::iterator it = q.begin(); it != q.end(); ++it) {
-- int id = *it;
-- Prog::Inst* ip = prog_->inst(id);
-- switch (ip->opcode()) {
-- default:
-- LOG(DFATAL) << "unhandled " << ip->opcode() << " in ComputeFirstByte";
-- break;
--
-- case kInstMatch:
-- // The empty string matches: no first byte.
-- return -1;
--
-- case kInstByteRange:
-- // Must match only a single byte
-- if (ip->lo() != ip->hi())
-- return -1;
-- if (ip->foldcase() && 'a' <= ip->lo() && ip->lo() <= 'z')
-- return -1;
-- // If we haven't seen any bytes yet, record it;
-- // otherwise must match the one we saw before.
-- if (b == -1)
-- b = ip->lo();
-- else if (b != ip->lo())
-- return -1;
-- break;
--
-- case kInstNop:
-- case kInstCapture:
-- case kInstEmptyWidth:
-- // Continue on.
-- // Ignore ip->empty() flags for kInstEmptyWidth
-- // in order to be as conservative as possible
-- // (assume all possible empty-width flags are true).
-- if (ip->out())
-- q.insert(ip->out());
-- break;
--
-- case kInstAlt:
-- case kInstAltMatch:
-- // Explore alternatives.
-- if (ip->out())
-- q.insert(ip->out());
-- if (ip->out1())
-- q.insert(ip->out1());
-- break;
--
-- case kInstFail:
-- break;
-- }
-- }
-- return b;
--}
--
--bool
--Prog::SearchNFA(const StringPiece& text, const StringPiece& context,
-- Anchor anchor, MatchKind kind,
-- StringPiece* match, int nmatch) {
-- if (NFA::Debug)
-- Dump();
--
-- NFA nfa(this);
-- StringPiece sp;
-- if (kind == kFullMatch) {
-- anchor = kAnchored;
-- if (nmatch == 0) {
-- match = &sp;
-- nmatch = 1;
-- }
-- }
-- if (!nfa.Search(text, context, anchor == kAnchored, kind != kFirstMatch, match, nmatch))
-- return false;
-- if (kind == kFullMatch && match[0].end() != text.end())
-- return false;
-- return true;
--}
--
--} // namespace re2
--
-diff --git a/re2/re2/onepass.cc b/re2/re2/onepass.cc
-deleted file mode 100644
-index 1c49988..0000000
---- a/re2/re2/onepass.cc
-+++ /dev/null
-@@ -1,614 +0,0 @@
--// Copyright 2008 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Tested by search_test.cc.
--//
--// Prog::SearchOnePass is an efficient implementation of
--// regular expression search with submatch tracking for
--// what I call "one-pass regular expressions". (An alternate
--// name might be "backtracking-free regular expressions".)
--//
--// One-pass regular expressions have the property that
--// at each input byte during an anchored match, there may be
--// multiple alternatives but only one can proceed for any
--// given input byte.
--//
--// For example, the regexp /x*yx*/ is one-pass: you read
--// x's until a y, then you read the y, then you keep reading x's.
--// At no point do you have to guess what to do or back up
--// and try a different guess.
--//
--// On the other hand, /x*x/ is not one-pass: when you're
--// looking at an input "x", it's not clear whether you should
--// use it to extend the x* or as the final x.
--//
--// More examples: /([^ ]*) (.*)/ is one-pass; /(.*) (.*)/ is not.
--// /(\d+)-(\d+)/ is one-pass; /(\d+).(\d+)/ is not.
--//
--// A simple intuition for identifying one-pass regular expressions
--// is that it's always immediately obvious when a repetition ends.
--// It must also be immediately obvious which branch of an | to take:
--//
--// /x(y|z)/ is one-pass, but /(xy|xz)/ is not.
--//
--// The NFA-based search in nfa.cc does some bookkeeping to
--// avoid the need for backtracking and its associated exponential blowup.
--// But if we have a one-pass regular expression, there is no
--// possibility of backtracking, so there is no need for the
--// extra bookkeeping. Hence, this code.
--//
--// On a one-pass regular expression, the NFA code in nfa.cc
--// runs at about 1/20 of the backtracking-based PCRE speed.
--// In contrast, the code in this file runs at about the same
--// speed as PCRE.
--//
--// One-pass regular expressions get used a lot when RE is
--// used for parsing simple strings, so it pays off to
--// notice them and handle them efficiently.
--//
--// See also Anne Brüggemann-Klein and Derick Wood,
--// "One-unambiguous regular languages", Information and Computation 142(2).
--
--#include <string.h>
--#include <map>
--#include "util/util.h"
--#include "util/arena.h"
--#include "util/sparse_set.h"
--#include "re2/prog.h"
--#include "re2/stringpiece.h"
--
--namespace re2 {
--
--static const int Debug = 0;
--
--// The key insight behind this implementation is that the
--// non-determinism in an NFA for a one-pass regular expression
--// is contained. To explain what that means, first a
--// refresher about what regular expression programs look like
--// and how the usual NFA execution runs.
--//
--// In a regular expression program, only the kInstByteRange
--// instruction processes an input byte c and moves on to the
--// next byte in the string (it does so if c is in the given range).
--// The kInstByteRange instructions correspond to literal characters
--// and character classes in the regular expression.
--//
--// The kInstAlt instructions are used as wiring to connect the
--// kInstByteRange instructions together in interesting ways when
--// implementing | + and *.
--// The kInstAlt instruction forks execution, like a goto that
--// jumps to ip->out() and ip->out1() in parallel. Each of the
--// resulting computation paths is called a thread.
--//
--// The other instructions -- kInstEmptyWidth, kInstMatch, kInstCapture --
--// are interesting in their own right but like kInstAlt they don't
--// advance the input pointer. Only kInstByteRange does.
--//
--// The automaton execution in nfa.cc runs all the possible
--// threads of execution in lock-step over the input. To process
--// a particular byte, each thread gets run until it either dies
--// or finds a kInstByteRange instruction matching the byte.
--// If the latter happens, the thread stops just past the
--// kInstByteRange instruction (at ip->out()) and waits for
--// the other threads to finish processing the input byte.
--// Then, once all the threads have processed that input byte,
--// the whole process repeats. The kInstAlt state instruction
--// might create new threads during input processing, but no
--// matter what, all the threads stop after a kInstByteRange
--// and wait for the other threads to "catch up".
--// Running in lock step like this ensures that the NFA reads
--// the input string only once.
--//
--// Each thread maintains its own set of capture registers
--// (the string positions at which it executed the kInstCapture
--// instructions corresponding to capturing parentheses in the
--// regular expression). Repeated copying of the capture registers
--// is the main performance bottleneck in the NFA implementation.
--//
--// A regular expression program is "one-pass" if, no matter what
--// the input string, there is only one thread that makes it
--// past a kInstByteRange instruction at each input byte. This means
--// that there is in some sense only one active thread throughout
--// the execution. Other threads might be created during the
--// processing of an input byte, but they are ephemeral: only one
--// thread is left to start processing the next input byte.
--// This is what I meant above when I said the non-determinism
--// was "contained".
--//
--// To execute a one-pass regular expression program, we can build
--// a DFA (no non-determinism) that has at most as many states as
--// the NFA (compare this to the possibly exponential number of states
--// in the general case). Each state records, for each possible
--// input byte, the next state along with the conditions required
--// before entering that state -- empty-width flags that must be true
--// and capture operations that must be performed. It also records
--// whether a set of conditions required to finish a match at that
--// point in the input rather than process the next byte.
--
--// A state in the one-pass NFA (aka DFA) - just an array of actions.
--struct OneState;
--
--// A state in the one-pass NFA - just an array of actions indexed
--// by the bytemap_[] of the next input byte. (The bytemap
--// maps next input bytes into equivalence classes, to reduce
--// the memory footprint.)
--struct OneState {
-- uint32 matchcond; // conditions to match right now.
-- uint32 action[1];
--};
--
--// The uint32 conditions in the action are a combination of
--// condition and capture bits and the next state. The bottom 16 bits
--// are the condition and capture bits, and the top 16 are the index of
--// the next state.
--//
--// Bits 0-5 are the empty-width flags from prog.h.
--// Bit 6 is kMatchWins, which means the match takes
--// priority over moving to next in a first-match search.
--// The remaining bits mark capture registers that should
--// be set to the current input position. The capture bits
--// start at index 2, since the search loop can take care of
--// cap[0], cap[1] (the overall match position).
--// That means we can handle up to 5 capturing parens: $1 through $4, plus $0.
--// No input position can satisfy both kEmptyWordBoundary
--// and kEmptyNonWordBoundary, so we can use that as a sentinel
--// instead of needing an extra bit.
--
--static const int kIndexShift = 16; // number of bits below index
--static const int kEmptyShift = 6; // number of empty flags in prog.h
--static const int kRealCapShift = kEmptyShift + 1;
--static const int kRealMaxCap = (kIndexShift - kRealCapShift) / 2 * 2;
--
--// Parameters used to skip over cap[0], cap[1].
--static const int kCapShift = kRealCapShift - 2;
--static const int kMaxCap = kRealMaxCap + 2;
--
--static const uint32 kMatchWins = 1 << kEmptyShift;
--static const uint32 kCapMask = ((1 << kRealMaxCap) - 1) << kRealCapShift;
--
--static const uint32 kImpossible = kEmptyWordBoundary | kEmptyNonWordBoundary;
--
--// Check, at compile time, that prog.h agrees with math above.
--// This function is never called.
--void OnePass_Checks() {
-- COMPILE_ASSERT((1<<kEmptyShift)-1 == kEmptyAllFlags,
-- kEmptyShift_disagrees_with_kEmptyAllFlags);
-- // kMaxCap counts pointers, kMaxOnePassCapture counts pairs.
-- COMPILE_ASSERT(kMaxCap == Prog::kMaxOnePassCapture*2,
-- kMaxCap_disagrees_with_kMaxOnePassCapture);
--}
--
--static bool Satisfy(uint32 cond, const StringPiece& context, const char* p) {
-- uint32 satisfied = Prog::EmptyFlags(context, p);
-- if (cond & kEmptyAllFlags & ~satisfied)
-- return false;
-- return true;
--}
--
--// Apply the capture bits in cond, saving p to the appropriate
--// locations in cap[].
--static void ApplyCaptures(uint32 cond, const char* p,
-- const char** cap, int ncap) {
-- for (int i = 2; i < ncap; i++)
-- if (cond & (1 << kCapShift << i))
-- cap[i] = p;
--}
--
--// Compute a node pointer.
--// Basically (OneState*)(nodes + statesize*nodeindex)
--// but the version with the C++ casts overflows 80 characters (and is ugly).
--static inline OneState* IndexToNode(volatile uint8* nodes, int statesize,
-- int nodeindex) {
-- return reinterpret_cast<OneState*>(
-- const_cast<uint8*>(nodes + statesize*nodeindex));
--}
--
--bool Prog::SearchOnePass(const StringPiece& text,
-- const StringPiece& const_context,
-- Anchor anchor, MatchKind kind,
-- StringPiece* match, int nmatch) {
-- if (anchor != kAnchored && kind != kFullMatch) {
-- LOG(DFATAL) << "Cannot use SearchOnePass for unanchored matches.";
-- return false;
-- }
--
-- // Make sure we have at least cap[1],
-- // because we use it to tell if we matched.
-- int ncap = 2*nmatch;
-- if (ncap < 2)
-- ncap = 2;
--
-- const char* cap[kMaxCap];
-- for (int i = 0; i < ncap; i++)
-- cap[i] = NULL;
--
-- const char* matchcap[kMaxCap];
-- for (int i = 0; i < ncap; i++)
-- matchcap[i] = NULL;
--
-- StringPiece context = const_context;
-- if (context.begin() == NULL)
-- context = text;
-- if (anchor_start() && context.begin() != text.begin())
-- return false;
-- if (anchor_end() && context.end() != text.end())
-- return false;
-- if (anchor_end())
-- kind = kFullMatch;
--
-- // State and act are marked volatile to
-- // keep the compiler from re-ordering the
-- // memory accesses walking over the NFA.
-- // This is worth about 5%.
-- volatile OneState* state = onepass_start_;
-- volatile uint8* nodes = onepass_nodes_;
-- volatile uint32 statesize = onepass_statesize_;
-- uint8* bytemap = bytemap_;
-- const char* bp = text.begin();
-- const char* ep = text.end();
-- const char* p;
-- bool matched = false;
-- matchcap[0] = bp;
-- cap[0] = bp;
-- uint32 nextmatchcond = state->matchcond;
-- for (p = bp; p < ep; p++) {
-- int c = bytemap[*p & 0xFF];
-- uint32 matchcond = nextmatchcond;
-- uint32 cond = state->action[c];
--
-- // Determine whether we can reach act->next.
-- // If so, advance state and nextmatchcond.
-- if ((cond & kEmptyAllFlags) == 0 || Satisfy(cond, context, p)) {
-- uint32 nextindex = cond >> kIndexShift;
-- state = IndexToNode(nodes, statesize, nextindex);
-- nextmatchcond = state->matchcond;
-- } else {
-- state = NULL;
-- nextmatchcond = kImpossible;
-- }
--
-- // This code section is carefully tuned.
-- // The goto sequence is about 10% faster than the
-- // obvious rewrite as a large if statement in the
-- // ASCIIMatchRE2 and DotMatchRE2 benchmarks.
--
-- // Saving the match capture registers is expensive.
-- // Is this intermediate match worth thinking about?
--
-- // Not if we want a full match.
-- if (kind == kFullMatch)
-- goto skipmatch;
--
-- // Not if it's impossible.
-- if (matchcond == kImpossible)
-- goto skipmatch;
--
-- // Not if the possible match is beaten by the certain
-- // match at the next byte. When this test is useless
-- // (e.g., HTTPPartialMatchRE2) it slows the loop by
-- // about 10%, but when it avoids work (e.g., DotMatchRE2),
-- // it cuts the loop execution by about 45%.
-- if ((cond & kMatchWins) == 0 && (nextmatchcond & kEmptyAllFlags) == 0)
-- goto skipmatch;
--
-- // Finally, the match conditions must be satisfied.
-- if ((matchcond & kEmptyAllFlags) == 0 || Satisfy(matchcond, context, p)) {
-- for (int i = 2; i < 2*nmatch; i++)
-- matchcap[i] = cap[i];
-- if (nmatch > 1 && (matchcond & kCapMask))
-- ApplyCaptures(matchcond, p, matchcap, ncap);
-- matchcap[1] = p;
-- matched = true;
--
-- // If we're in longest match mode, we have to keep
-- // going and see if we find a longer match.
-- // In first match mode, we can stop if the match
-- // takes priority over the next state for this input byte.
-- // That bit is per-input byte and thus in cond, not matchcond.
-- if (kind == kFirstMatch && (cond & kMatchWins))
-- goto done;
-- }
--
-- skipmatch:
-- if (state == NULL)
-- goto done;
-- if ((cond & kCapMask) && nmatch > 1)
-- ApplyCaptures(cond, p, cap, ncap);
-- }
--
-- // Look for match at end of input.
-- {
-- uint32 matchcond = state->matchcond;
-- if (matchcond != kImpossible &&
-- ((matchcond & kEmptyAllFlags) == 0 || Satisfy(matchcond, context, p))) {
-- if (nmatch > 1 && (matchcond & kCapMask))
-- ApplyCaptures(matchcond, p, cap, ncap);
-- for (int i = 2; i < ncap; i++)
-- matchcap[i] = cap[i];
-- matchcap[1] = p;
-- matched = true;
-- }
-- }
--
--done:
-- if (!matched)
-- return false;
-- for (int i = 0; i < nmatch; i++)
-- match[i].set(matchcap[2*i], matchcap[2*i+1] - matchcap[2*i]);
-- return true;
--}
--
--
--// Analysis to determine whether a given regexp program is one-pass.
--
--// If ip is not on workq, adds ip to work queue and returns true.
--// If ip is already on work queue, does nothing and returns false.
--// If ip is NULL, does nothing and returns true (pretends to add it).
--typedef SparseSet Instq;
--static bool AddQ(Instq *q, int id) {
-- if (id == 0)
-- return true;
-- if (q->contains(id))
-- return false;
-- q->insert(id);
-- return true;
--}
--
--struct InstCond {
-- int id;
-- uint32 cond;
--};
--
--// Returns whether this is a one-pass program; that is,
--// returns whether it is safe to use SearchOnePass on this program.
--// These conditions must be true for any instruction ip:
--//
--// (1) for any other Inst nip, there is at most one input-free
--// path from ip to nip.
--// (2) there is at most one kInstByte instruction reachable from
--// ip that matches any particular byte c.
--// (3) there is at most one input-free path from ip to a kInstMatch
--// instruction.
--//
--// This is actually just a conservative approximation: it might
--// return false when the answer is true, when kInstEmptyWidth
--// instructions are involved.
--// Constructs and saves corresponding one-pass NFA on success.
--bool Prog::IsOnePass() {
-- if (did_onepass_)
-- return onepass_start_ != NULL;
-- did_onepass_ = true;
--
-- if (start() == 0) // no match
-- return false;
--
-- // Steal memory for the one-pass NFA from the overall DFA budget.
-- // Willing to use at most 1/4 of the DFA budget (heuristic).
-- // Limit max node count to 65000 as a conservative estimate to
-- // avoid overflowing 16-bit node index in encoding.
-- int maxnodes = 2 + byte_inst_count_;
-- int statesize = sizeof(OneState) + (bytemap_range_-1)*sizeof(uint32);
-- if (maxnodes >= 65000 || dfa_mem_ / 4 / statesize < maxnodes)
-- return false;
--
-- // Flood the graph starting at the start state, and check
-- // that in each reachable state, each possible byte leads
-- // to a unique next state.
-- int size = this->size();
-- InstCond *stack = new InstCond[size];
--
-- int* nodebyid = new int[size]; // indexed by ip
-- memset(nodebyid, 0xFF, size*sizeof nodebyid[0]);
--
-- uint8* nodes = new uint8[maxnodes*statesize];
-- uint8* nodep = nodes;
--
-- Instq tovisit(size), workq(size);
-- AddQ(&tovisit, start());
-- nodebyid[start()] = 0;
-- nodep += statesize;
-- int nalloc = 1;
-- for (Instq::iterator it = tovisit.begin(); it != tovisit.end(); ++it) {
-- int id = *it;
-- int nodeindex = nodebyid[id];
-- OneState* node = IndexToNode(nodes, statesize, nodeindex);
--
-- // Flood graph using manual stack, filling in actions as found.
-- // Default is none.
-- for (int b = 0; b < bytemap_range_; b++)
-- node->action[b] = kImpossible;
-- node->matchcond = kImpossible;
--
-- workq.clear();
-- bool matched = false;
-- int nstack = 0;
-- stack[nstack].id = id;
-- stack[nstack++].cond = 0;
-- while (nstack > 0) {
-- int id = stack[--nstack].id;
-- Prog::Inst* ip = inst(id);
-- uint32 cond = stack[nstack].cond;
-- switch (ip->opcode()) {
-- case kInstAltMatch:
-- // TODO(rsc): Ignoring kInstAltMatch optimization.
-- // Should implement it in this engine, but it's subtle.
-- // Fall through.
-- case kInstAlt:
-- // If already on work queue, (1) is violated: bail out.
-- if (!AddQ(&workq, ip->out()) || !AddQ(&workq, ip->out1()))
-- goto fail;
-- stack[nstack].id = ip->out1();
-- stack[nstack++].cond = cond;
-- stack[nstack].id = ip->out();
-- stack[nstack++].cond = cond;
-- break;
--
-- case kInstByteRange: {
-- int nextindex = nodebyid[ip->out()];
-- if (nextindex == -1) {
-- if (nalloc >= maxnodes) {
-- if (Debug)
-- LOG(ERROR)
-- << StringPrintf("Not OnePass: hit node limit %d > %d",
-- nalloc, maxnodes);
-- goto fail;
-- }
-- nextindex = nalloc;
-- nodep += statesize;
-- nodebyid[ip->out()] = nextindex;
-- nalloc++;
-- AddQ(&tovisit, ip->out());
-- }
-- if (matched)
-- cond |= kMatchWins;
-- for (int c = ip->lo(); c <= ip->hi(); c++) {
-- int b = bytemap_[c];
-- c = unbytemap_[b]; // last c in byte class
-- uint32 act = node->action[b];
-- uint32 newact = (nextindex << kIndexShift) | cond;
-- if ((act & kImpossible) == kImpossible) {
-- node->action[b] = newact;
-- } else if (act != newact) {
-- if (Debug) {
-- LOG(ERROR)
-- << StringPrintf("Not OnePass: conflict on byte "
-- "%#x at state %d",
-- c, *it);
-- }
-- goto fail;
-- }
-- }
-- if (ip->foldcase()) {
-- Rune lo = max<Rune>(ip->lo(), 'a') + 'A' - 'a';
-- Rune hi = min<Rune>(ip->hi(), 'z') + 'A' - 'a';
-- for (int c = lo; c <= hi; c++) {
-- int b = bytemap_[c];
-- c = unbytemap_[b]; // last c in class
-- uint32 act = node->action[b];
-- uint32 newact = (nextindex << kIndexShift) | cond;
-- if ((act & kImpossible) == kImpossible) {
-- node->action[b] = newact;
-- } else if (act != newact) {
-- if (Debug) {
-- LOG(ERROR)
-- << StringPrintf("Not OnePass: conflict on byte "
-- "%#x at state %d",
-- c, *it);
-- }
-- goto fail;
-- }
-- }
-- }
-- break;
-- }
--
-- case kInstCapture:
-- if (ip->cap() < kMaxCap)
-- cond |= (1 << kCapShift) << ip->cap();
-- goto QueueEmpty;
--
-- case kInstEmptyWidth:
-- cond |= ip->empty();
-- goto QueueEmpty;
--
-- case kInstNop:
-- QueueEmpty:
-- // kInstCapture and kInstNop always proceed to ip->out().
-- // kInstEmptyWidth only sometimes proceeds to ip->out(),
-- // but as a conservative approximation we assume it always does.
-- // We could be a little more precise by looking at what c
-- // is, but that seems like overkill.
--
-- // If already on work queue, (1) is violated: bail out.
-- if (!AddQ(&workq, ip->out())) {
-- if (Debug) {
-- LOG(ERROR) << StringPrintf("Not OnePass: multiple paths"
-- " %d -> %d\n",
-- *it, ip->out());
-- }
-- goto fail;
-- }
-- stack[nstack].id = ip->out();
-- stack[nstack++].cond = cond;
-- break;
--
-- case kInstMatch:
-- if (matched) {
-- // (3) is violated
-- if (Debug) {
-- LOG(ERROR) << StringPrintf("Not OnePass: multiple matches"
-- " from %d\n", *it);
-- }
-- goto fail;
-- }
-- matched = true;
-- node->matchcond = cond;
-- break;
--
-- case kInstFail:
-- break;
-- }
-- }
-- }
--
-- if (Debug) { // For debugging, dump one-pass NFA to LOG(ERROR).
-- string dump = "prog dump:\n" + Dump() + "node dump\n";
-- map<int, int> idmap;
-- for (int i = 0; i < size; i++)
-- if (nodebyid[i] != -1)
-- idmap[nodebyid[i]] = i;
--
-- StringAppendF(&dump, "byte ranges:\n");
-- int i = 0;
-- for (int b = 0; b < bytemap_range_; b++) {
-- int lo = i;
-- while (bytemap_[i] == b)
-- i++;
-- StringAppendF(&dump, "\t%d: %#x-%#x\n", b, lo, i - 1);
-- }
--
-- for (Instq::iterator it = tovisit.begin(); it != tovisit.end(); ++it) {
-- int id = *it;
-- int nodeindex = nodebyid[id];
-- if (nodeindex == -1)
-- continue;
-- OneState* node = IndexToNode(nodes, statesize, nodeindex);
-- string s;
-- StringAppendF(&dump, "node %d id=%d: matchcond=%#x\n",
-- nodeindex, id, node->matchcond);
-- for (int i = 0; i < bytemap_range_; i++) {
-- if ((node->action[i] & kImpossible) == kImpossible)
-- continue;
-- StringAppendF(&dump, " %d cond %#x -> %d id=%d\n",
-- i, node->action[i] & 0xFFFF,
-- node->action[i] >> kIndexShift,
-- idmap[node->action[i] >> kIndexShift]);
-- }
-- }
-- LOG(ERROR) << dump;
-- }
--
-- // Overallocated earlier; cut down to actual size.
-- nodep = new uint8[nalloc*statesize];
-- memmove(nodep, nodes, nalloc*statesize);
-- delete[] nodes;
-- nodes = nodep;
--
-- onepass_start_ = IndexToNode(nodes, statesize, nodebyid[start()]);
-- onepass_nodes_ = nodes;
-- onepass_statesize_ = statesize;
-- dfa_mem_ -= nalloc*statesize;
--
-- delete[] stack;
-- delete[] nodebyid;
-- return true;
--
--fail:
-- delete[] stack;
-- delete[] nodebyid;
-- delete[] nodes;
-- return false;
--}
--
--} // namespace re2
-diff --git a/re2/re2/parse.cc b/re2/re2/parse.cc
-deleted file mode 100644
-index 034b133..0000000
---- a/re2/re2/parse.cc
-+++ /dev/null
-@@ -1,2202 +0,0 @@
--// Copyright 2006 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Regular expression parser.
--
--// The parser is a simple precedence-based parser with a
--// manual stack. The parsing work is done by the methods
--// of the ParseState class. The Regexp::Parse function is
--// essentially just a lexer that calls the ParseState method
--// for each token.
--
--// The parser recognizes POSIX extended regular expressions
--// excluding backreferences, collating elements, and collating
--// classes. It also allows the empty string as a regular expression
--// and recognizes the Perl escape sequences \d, \s, \w, \D, \S, and \W.
--// See regexp.h for rationale.
--
--#include "util/util.h"
--#include "re2/regexp.h"
--#include "re2/stringpiece.h"
--#include "re2/unicode_casefold.h"
--#include "re2/unicode_groups.h"
--
--namespace re2 {
--
--// Regular expression parse state.
--// The list of parsed regexps so far is maintained as a vector of
--// Regexp pointers called the stack. Left parenthesis and vertical
--// bar markers are also placed on the stack, as Regexps with
--// non-standard opcodes.
--// Scanning a left parenthesis causes the parser to push a left parenthesis
--// marker on the stack.
--// Scanning a vertical bar causes the parser to pop the stack until it finds a
--// vertical bar or left parenthesis marker (not popping the marker),
--// concatenate all the popped results, and push them back on
--// the stack (DoConcatenation).
--// Scanning a right parenthesis causes the parser to act as though it
--// has seen a vertical bar, which then leaves the top of the stack in the
--// form LeftParen regexp VerticalBar regexp VerticalBar ... regexp VerticalBar.
--// The parser pops all this off the stack and creates an alternation of the
--// regexps (DoAlternation).
--
--class Regexp::ParseState {
-- public:
-- ParseState(ParseFlags flags, const StringPiece& whole_regexp,
-- RegexpStatus* status);
-- ~ParseState();
--
-- ParseFlags flags() { return flags_; }
-- int rune_max() { return rune_max_; }
--
-- // Parse methods. All public methods return a bool saying
-- // whether parsing should continue. If a method returns
-- // false, it has set fields in *status_, and the parser
-- // should return NULL.
--
-- // Pushes the given regular expression onto the stack.
-- // Could check for too much memory used here.
-- bool PushRegexp(Regexp* re);
--
-- // Pushes the literal rune r onto the stack.
-- bool PushLiteral(Rune r);
--
-- // Pushes a regexp with the given op (and no args) onto the stack.
-- bool PushSimpleOp(RegexpOp op);
--
-- // Pushes a ^ onto the stack.
-- bool PushCarat();
--
-- // Pushes a \b (word == true) or \B (word == false) onto the stack.
-- bool PushWordBoundary(bool word);
--
-- // Pushes a $ onto the stack.
-- bool PushDollar();
--
-- // Pushes a . onto the stack
-- bool PushDot();
--
-- // Pushes a repeat operator regexp onto the stack.
-- // A valid argument for the operator must already be on the stack.
-- // s is the name of the operator, for use in error messages.
-- bool PushRepeatOp(RegexpOp op, const StringPiece& s, bool nongreedy);
--
-- // Pushes a repetition regexp onto the stack.
-- // A valid argument for the operator must already be on the stack.
-- bool PushRepetition(int min, int max, const StringPiece& s, bool nongreedy);
--
-- // Checks whether a particular regexp op is a marker.
-- bool IsMarker(RegexpOp op);
--
-- // Processes a left parenthesis in the input.
-- // Pushes a marker onto the stack.
-- bool DoLeftParen(const StringPiece& name);
-- bool DoLeftParenNoCapture();
--
-- // Processes a vertical bar in the input.
-- bool DoVerticalBar();
--
-- // Processes a right parenthesis in the input.
-- bool DoRightParen();
--
-- // Processes the end of input, returning the final regexp.
-- Regexp* DoFinish();
--
-- // Finishes the regexp if necessary, preparing it for use
-- // in a more complicated expression.
-- // If it is a CharClassBuilder, converts into a CharClass.
-- Regexp* FinishRegexp(Regexp*);
--
-- // These routines don't manipulate the parse stack
-- // directly, but they do need to look at flags_.
-- // ParseCharClass also manipulates the internals of Regexp
-- // while creating *out_re.
--
-- // Parse a character class into *out_re.
-- // Removes parsed text from s.
-- bool ParseCharClass(StringPiece* s, Regexp** out_re,
-- RegexpStatus* status);
--
-- // Parse a character class character into *rp.
-- // Removes parsed text from s.
-- bool ParseCCCharacter(StringPiece* s, Rune *rp,
-- const StringPiece& whole_class,
-- RegexpStatus* status);
--
-- // Parse a character class range into rr.
-- // Removes parsed text from s.
-- bool ParseCCRange(StringPiece* s, RuneRange* rr,
-- const StringPiece& whole_class,
-- RegexpStatus* status);
--
-- // Parse a Perl flag set or non-capturing group from s.
-- bool ParsePerlFlags(StringPiece* s);
--
--
-- // Finishes the current concatenation,
-- // collapsing it into a single regexp on the stack.
-- void DoConcatenation();
--
-- // Finishes the current alternation,
-- // collapsing it to a single regexp on the stack.
-- void DoAlternation();
--
-- // Generalized DoAlternation/DoConcatenation.
-- void DoCollapse(RegexpOp op);
--
-- // Maybe concatenate Literals into LiteralString.
-- bool MaybeConcatString(int r, ParseFlags flags);
--
--private:
-- ParseFlags flags_;
-- StringPiece whole_regexp_;
-- RegexpStatus* status_;
-- Regexp* stacktop_;
-- int ncap_; // number of capturing parens seen
-- int rune_max_; // maximum char value for this encoding
--
-- DISALLOW_EVIL_CONSTRUCTORS(ParseState);
--};
--
--// Pseudo-operators - only on parse stack.
--const RegexpOp kLeftParen = static_cast<RegexpOp>(kMaxRegexpOp+1);
--const RegexpOp kVerticalBar = static_cast<RegexpOp>(kMaxRegexpOp+2);
--
--Regexp::ParseState::ParseState(ParseFlags flags,
-- const StringPiece& whole_regexp,
-- RegexpStatus* status)
-- : flags_(flags), whole_regexp_(whole_regexp),
-- status_(status), stacktop_(NULL), ncap_(0) {
-- if (flags_ & Latin1)
-- rune_max_ = 0xFF;
-- else
-- rune_max_ = Runemax;
--}
--
--// Cleans up by freeing all the regexps on the stack.
--Regexp::ParseState::~ParseState() {
-- Regexp* next;
-- for (Regexp* re = stacktop_; re != NULL; re = next) {
-- next = re->down_;
-- re->down_ = NULL;
-- if (re->op() == kLeftParen)
-- delete re->name_;
-- re->Decref();
-- }
--}
--
--// Finishes the regexp if necessary, preparing it for use in
--// a more complex expression.
--// If it is a CharClassBuilder, converts into a CharClass.
--Regexp* Regexp::ParseState::FinishRegexp(Regexp* re) {
-- if (re == NULL)
-- return NULL;
-- re->down_ = NULL;
--
-- if (re->op_ == kRegexpCharClass && re->ccb_ != NULL) {
-- CharClassBuilder* ccb = re->ccb_;
-- re->ccb_ = NULL;
-- re->cc_ = ccb->GetCharClass();
-- delete ccb;
-- }
--
-- return re;
--}
--
--// Pushes the given regular expression onto the stack.
--// Could check for too much memory used here.
--bool Regexp::ParseState::PushRegexp(Regexp* re) {
-- MaybeConcatString(-1, NoParseFlags);
--
-- // Special case: a character class of one character is just
-- // a literal. This is a common idiom for escaping
-- // single characters (e.g., [.] instead of \.), and some
-- // analysis does better with fewer character classes.
-- // Similarly, [Aa] can be rewritten as a literal A with ASCII case folding.
-- if (re->op_ == kRegexpCharClass) {
-- if (re->ccb_->size() == 1) {
-- Rune r = re->ccb_->begin()->lo;
-- re->Decref();
-- re = new Regexp(kRegexpLiteral, flags_);
-- re->rune_ = r;
-- } else if (re->ccb_->size() == 2) {
-- Rune r = re->ccb_->begin()->lo;
-- if ('A' <= r && r <= 'Z' && re->ccb_->Contains(r + 'a' - 'A')) {
-- re->Decref();
-- re = new Regexp(kRegexpLiteral, flags_ | FoldCase);
-- re->rune_ = r + 'a' - 'A';
-- }
-- }
-- }
--
-- if (!IsMarker(re->op()))
-- re->simple_ = re->ComputeSimple();
-- re->down_ = stacktop_;
-- stacktop_ = re;
-- return true;
--}
--
--// Searches the case folding tables and returns the CaseFold* that contains r.
--// If there isn't one, returns the CaseFold* with smallest f->lo bigger than r.
--// If there isn't one, returns NULL.
--CaseFold* LookupCaseFold(CaseFold *f, int n, Rune r) {
-- CaseFold* ef = f + n;
--
-- // Binary search for entry containing r.
-- while (n > 0) {
-- int m = n/2;
-- if (f[m].lo <= r && r <= f[m].hi)
-- return &f[m];
-- if (r < f[m].lo) {
-- n = m;
-- } else {
-- f += m+1;
-- n -= m+1;
-- }
-- }
--
-- // There is no entry that contains r, but f points
-- // where it would have been. Unless f points at
-- // the end of the array, it points at the next entry
-- // after r.
-- if (f < ef)
-- return f;
--
-- // No entry contains r; no entry contains runes > r.
-- return NULL;
--}
--
--// Returns the result of applying the fold f to the rune r.
--Rune ApplyFold(CaseFold *f, Rune r) {
-- switch (f->delta) {
-- default:
-- return r + f->delta;
--
-- case EvenOddSkip: // even <-> odd but only applies to every other
-- if ((r - f->lo) % 2)
-- return r;
-- // fall through
-- case EvenOdd: // even <-> odd
-- if (r%2 == 0)
-- return r + 1;
-- return r - 1;
--
-- case OddEvenSkip: // odd <-> even but only applies to every other
-- if ((r - f->lo) % 2)
-- return r;
-- // fall through
-- case OddEven: // odd <-> even
-- if (r%2 == 1)
-- return r + 1;
-- return r - 1;
-- }
--}
--
--// Returns the next Rune in r's folding cycle (see unicode_casefold.h).
--// Examples:
--// CycleFoldRune('A') = 'a'
--// CycleFoldRune('a') = 'A'
--//
--// CycleFoldRune('K') = 'k'
--// CycleFoldRune('k') = 0x212A (Kelvin)
--// CycleFoldRune(0x212A) = 'K'
--//
--// CycleFoldRune('?') = '?'
--Rune CycleFoldRune(Rune r) {
-- CaseFold* f = LookupCaseFold(unicode_casefold, num_unicode_casefold, r);
-- if (f == NULL || r < f->lo)
-- return r;
-- return ApplyFold(f, r);
--}
--
--// Add lo-hi to the class, along with their fold-equivalent characters.
--// If lo-hi is already in the class, assume that the fold-equivalent
--// chars are there too, so there's no work to do.
--static void AddFoldedRange(CharClassBuilder* cc, Rune lo, Rune hi, int depth) {
-- // AddFoldedRange calls itself recursively for each rune in the fold cycle.
-- // Most folding cycles are small: there aren't any bigger than four in the
-- // current Unicode tables. make_unicode_casefold.py checks that
-- // the cycles are not too long, and we double-check here using depth.
-- if (depth > 10) {
-- LOG(DFATAL) << "AddFoldedRange recurses too much.";
-- return;
-- }
--
-- if (!cc->AddRange(lo, hi)) // lo-hi was already there? we're done
-- return;
--
-- while (lo <= hi) {
-- CaseFold* f = LookupCaseFold(unicode_casefold, num_unicode_casefold, lo);
-- if (f == NULL) // lo has no fold, nor does anything above lo
-- break;
-- if (lo < f->lo) { // lo has no fold; next rune with a fold is f->lo
-- lo = f->lo;
-- continue;
-- }
--
-- // Add in the result of folding the range lo - f->hi
-- // and that range's fold, recursively.
-- Rune lo1 = lo;
-- Rune hi1 = min<Rune>(hi, f->hi);
-- switch (f->delta) {
-- default:
-- lo1 += f->delta;
-- hi1 += f->delta;
-- break;
-- case EvenOdd:
-- if (lo1%2 == 1)
-- lo1--;
-- if (hi1%2 == 0)
-- hi1++;
-- break;
-- case OddEven:
-- if (lo1%2 == 0)
-- lo1--;
-- if (hi1%2 == 1)
-- hi1++;
-- break;
-- }
-- AddFoldedRange(cc, lo1, hi1, depth+1);
--
-- // Pick up where this fold left off.
-- lo = f->hi + 1;
-- }
--}
--
--// Pushes the literal rune r onto the stack.
--bool Regexp::ParseState::PushLiteral(Rune r) {
-- // Do case folding if needed.
-- if ((flags_ & FoldCase) && CycleFoldRune(r) != r) {
-- Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase);
-- re->ccb_ = new CharClassBuilder;
-- Rune r1 = r;
-- do {
-- if (!(flags_ & NeverNL) || r != '\n') {
-- re->ccb_->AddRange(r, r);
-- }
-- r = CycleFoldRune(r);
-- } while (r != r1);
-- re->ccb_->RemoveAbove(rune_max_);
-- return PushRegexp(re);
-- }
--
-- // Exclude newline if applicable.
-- if ((flags_ & NeverNL) && r == '\n')
-- return PushRegexp(new Regexp(kRegexpNoMatch, flags_));
--
-- // No fancy stuff worked. Ordinary literal.
-- if (MaybeConcatString(r, flags_))
-- return true;
--
-- Regexp* re = new Regexp(kRegexpLiteral, flags_);
-- re->rune_ = r;
-- return PushRegexp(re);
--}
--
--// Pushes a ^ onto the stack.
--bool Regexp::ParseState::PushCarat() {
-- if (flags_ & OneLine) {
-- return PushSimpleOp(kRegexpBeginText);
-- }
-- return PushSimpleOp(kRegexpBeginLine);
--}
--
--// Pushes a \b or \B onto the stack.
--bool Regexp::ParseState::PushWordBoundary(bool word) {
-- if (word)
-- return PushSimpleOp(kRegexpWordBoundary);
-- return PushSimpleOp(kRegexpNoWordBoundary);
--}
--
--// Pushes a $ onto the stack.
--bool Regexp::ParseState::PushDollar() {
-- if (flags_ & OneLine) {
-- // Clumsy marker so that MimicsPCRE() can tell whether
-- // this kRegexpEndText was a $ and not a \z.
-- Regexp::ParseFlags oflags = flags_;
-- flags_ = flags_ | WasDollar;
-- bool ret = PushSimpleOp(kRegexpEndText);
-- flags_ = oflags;
-- return ret;
-- }
-- return PushSimpleOp(kRegexpEndLine);
--}
--
--// Pushes a . onto the stack.
--bool Regexp::ParseState::PushDot() {
-- if ((flags_ & DotNL) && !(flags_ & NeverNL))
-- return PushSimpleOp(kRegexpAnyChar);
-- // Rewrite . into [^\n]
-- Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase);
-- re->ccb_ = new CharClassBuilder;
-- re->ccb_->AddRange(0, '\n' - 1);
-- re->ccb_->AddRange('\n' + 1, rune_max_);
-- return PushRegexp(re);
--}
--
--// Pushes a regexp with the given op (and no args) onto the stack.
--bool Regexp::ParseState::PushSimpleOp(RegexpOp op) {
-- Regexp* re = new Regexp(op, flags_);
-- return PushRegexp(re);
--}
--
--// Pushes a repeat operator regexp onto the stack.
--// A valid argument for the operator must already be on the stack.
--// The char c is the name of the operator, for use in error messages.
--bool Regexp::ParseState::PushRepeatOp(RegexpOp op, const StringPiece& s,
-- bool nongreedy) {
-- if (stacktop_ == NULL || IsMarker(stacktop_->op())) {
-- status_->set_code(kRegexpRepeatArgument);
-- status_->set_error_arg(s);
-- return false;
-- }
-- Regexp::ParseFlags fl = flags_;
-- if (nongreedy)
-- fl = fl ^ NonGreedy;
-- Regexp* re = new Regexp(op, fl);
-- re->AllocSub(1);
-- re->down_ = stacktop_->down_;
-- re->sub()[0] = FinishRegexp(stacktop_);
-- re->simple_ = re->ComputeSimple();
-- stacktop_ = re;
-- return true;
--}
--
--// Pushes a repetition regexp onto the stack.
--// A valid argument for the operator must already be on the stack.
--bool Regexp::ParseState::PushRepetition(int min, int max,
-- const StringPiece& s,
-- bool nongreedy) {
-- if ((max != -1 && max < min) || min > 1000 || max > 1000) {
-- status_->set_code(kRegexpRepeatSize);
-- status_->set_error_arg(s);
-- return false;
-- }
-- if (stacktop_ == NULL || IsMarker(stacktop_->op())) {
-- status_->set_code(kRegexpRepeatArgument);
-- status_->set_error_arg(s);
-- return false;
-- }
-- Regexp::ParseFlags fl = flags_;
-- if (nongreedy)
-- fl = fl ^ NonGreedy;
-- Regexp* re = new Regexp(kRegexpRepeat, fl);
-- re->min_ = min;
-- re->max_ = max;
-- re->AllocSub(1);
-- re->down_ = stacktop_->down_;
-- re->sub()[0] = FinishRegexp(stacktop_);
-- re->simple_ = re->ComputeSimple();
--
-- stacktop_ = re;
-- return true;
--}
--
--// Checks whether a particular regexp op is a marker.
--bool Regexp::ParseState::IsMarker(RegexpOp op) {
-- return op >= kLeftParen;
--}
--
--// Processes a left parenthesis in the input.
--// Pushes a marker onto the stack.
--bool Regexp::ParseState::DoLeftParen(const StringPiece& name) {
-- Regexp* re = new Regexp(kLeftParen, flags_);
-- re->cap_ = ++ncap_;
-- if (name.data() != NULL)
-- re->name_ = new string(name.as_string());
-- return PushRegexp(re);
--}
--
--// Pushes a non-capturing marker onto the stack.
--bool Regexp::ParseState::DoLeftParenNoCapture() {
-- Regexp* re = new Regexp(kLeftParen, flags_);
-- re->cap_ = -1;
-- return PushRegexp(re);
--}
--
--// Adds r to cc, along with r's upper case if foldascii is set.
--static void AddLiteral(CharClassBuilder* cc, Rune r, bool foldascii) {
-- cc->AddRange(r, r);
-- if (foldascii && 'a' <= r && r <= 'z')
-- cc->AddRange(r + 'A' - 'a', r + 'A' - 'a');
--}
--
--// Processes a vertical bar in the input.
--bool Regexp::ParseState::DoVerticalBar() {
-- MaybeConcatString(-1, NoParseFlags);
-- DoConcatenation();
--
-- // Below the vertical bar is a list to alternate.
-- // Above the vertical bar is a list to concatenate.
-- // We just did the concatenation, so either swap
-- // the result below the vertical bar or push a new
-- // vertical bar on the stack.
-- Regexp* r1;
-- Regexp* r2;
-- if ((r1 = stacktop_) != NULL &&
-- (r2 = stacktop_->down_) != NULL &&
-- r2->op() == kVerticalBar) {
-- // If above and below vertical bar are literal or char class,
-- // can merge into a single char class.
-- Regexp* r3;
-- if ((r1->op() == kRegexpLiteral ||
-- r1->op() == kRegexpCharClass ||
-- r1->op() == kRegexpAnyChar) &&
-- (r3 = r2->down_) != NULL) {
-- Rune rune;
-- switch (r3->op()) {
-- case kRegexpLiteral: // convert to char class
-- rune = r3->rune_;
-- r3->op_ = kRegexpCharClass;
-- r3->cc_ = NULL;
-- r3->ccb_ = new CharClassBuilder;
-- AddLiteral(r3->ccb_, rune, r3->parse_flags_ & Regexp::FoldCase);
-- // fall through
-- case kRegexpCharClass:
-- if (r1->op() == kRegexpLiteral)
-- AddLiteral(r3->ccb_, r1->rune_,
-- r1->parse_flags_ & Regexp::FoldCase);
-- else if (r1->op() == kRegexpCharClass)
-- r3->ccb_->AddCharClass(r1->ccb_);
-- if (r1->op() == kRegexpAnyChar || r3->ccb_->full()) {
-- delete r3->ccb_;
-- r3->ccb_ = NULL;
-- r3->op_ = kRegexpAnyChar;
-- }
-- // fall through
-- case kRegexpAnyChar:
-- // pop r1
-- stacktop_ = r2;
-- r1->Decref();
-- return true;
-- default:
-- break;
-- }
-- }
--
-- // Swap r1 below vertical bar (r2).
-- r1->down_ = r2->down_;
-- r2->down_ = r1;
-- stacktop_ = r2;
-- return true;
-- }
-- return PushSimpleOp(kVerticalBar);
--}
--
--// Processes a right parenthesis in the input.
--bool Regexp::ParseState::DoRightParen() {
-- // Finish the current concatenation and alternation.
-- DoAlternation();
--
-- // The stack should be: LeftParen regexp
-- // Remove the LeftParen, leaving the regexp,
-- // parenthesized.
-- Regexp* r1;
-- Regexp* r2;
-- if ((r1 = stacktop_) == NULL ||
-- (r2 = r1->down_) == NULL ||
-- r2->op() != kLeftParen) {
-- status_->set_code(kRegexpMissingParen);
-- status_->set_error_arg(whole_regexp_);
-- return false;
-- }
--
-- // Pop off r1, r2. Will Decref or reuse below.
-- stacktop_ = r2->down_;
--
-- // Restore flags from when paren opened.
-- Regexp* re = r2;
-- flags_ = re->parse_flags();
--
-- // Rewrite LeftParen as capture if needed.
-- if (re->cap_ > 0) {
-- re->op_ = kRegexpCapture;
-- // re->cap_ is already set
-- re->AllocSub(1);
-- re->sub()[0] = FinishRegexp(r1);
-- re->simple_ = re->ComputeSimple();
-- } else {
-- re->Decref();
-- re = r1;
-- }
-- return PushRegexp(re);
--}
--
--// Processes the end of input, returning the final regexp.
--Regexp* Regexp::ParseState::DoFinish() {
-- DoAlternation();
-- Regexp* re = stacktop_;
-- if (re != NULL && re->down_ != NULL) {
-- status_->set_code(kRegexpMissingParen);
-- status_->set_error_arg(whole_regexp_);
-- return NULL;
-- }
-- stacktop_ = NULL;
-- return FinishRegexp(re);
--}
--
--// Returns the leading regexp that re starts with.
--// The returned Regexp* points into a piece of re,
--// so it must not be used after the caller calls re->Decref().
--Regexp* Regexp::LeadingRegexp(Regexp* re) {
-- if (re->op() == kRegexpEmptyMatch)
-- return NULL;
-- if (re->op() == kRegexpConcat && re->nsub() >= 2) {
-- Regexp** sub = re->sub();
-- if (sub[0]->op() == kRegexpEmptyMatch)
-- return NULL;
-- return sub[0];
-- }
-- return re;
--}
--
--// Removes LeadingRegexp(re) from re and returns what's left.
--// Consumes the reference to re and may edit it in place.
--// If caller wants to hold on to LeadingRegexp(re),
--// must have already Incref'ed it.
--Regexp* Regexp::RemoveLeadingRegexp(Regexp* re) {
-- if (re->op() == kRegexpEmptyMatch)
-- return re;
-- if (re->op() == kRegexpConcat && re->nsub() >= 2) {
-- Regexp** sub = re->sub();
-- if (sub[0]->op() == kRegexpEmptyMatch)
-- return re;
-- sub[0]->Decref();
-- sub[0] = NULL;
-- if (re->nsub() == 2) {
-- // Collapse concatenation to single regexp.
-- Regexp* nre = sub[1];
-- sub[1] = NULL;
-- re->Decref();
-- return nre;
-- }
-- // 3 or more -> 2 or more.
-- re->nsub_--;
-- memmove(sub, sub + 1, re->nsub_ * sizeof sub[0]);
-- return re;
-- }
-- Regexp::ParseFlags pf = re->parse_flags();
-- re->Decref();
-- return new Regexp(kRegexpEmptyMatch, pf);
--}
--
--// Returns the leading string that re starts with.
--// The returned Rune* points into a piece of re,
--// so it must not be used after the caller calls re->Decref().
--Rune* Regexp::LeadingString(Regexp* re, int *nrune,
-- Regexp::ParseFlags *flags) {
-- while (re->op() == kRegexpConcat && re->nsub() > 0)
-- re = re->sub()[0];
--
-- *flags = static_cast<Regexp::ParseFlags>(re->parse_flags_ & Regexp::FoldCase);
--
-- if (re->op() == kRegexpLiteral) {
-- *nrune = 1;
-- return &re->rune_;
-- }
--
-- if (re->op() == kRegexpLiteralString) {
-- *nrune = re->nrunes_;
-- return re->runes_;
-- }
--
-- *nrune = 0;
-- return NULL;
--}
--
--// Removes the first n leading runes from the beginning of re.
--// Edits re in place.
--void Regexp::RemoveLeadingString(Regexp* re, int n) {
-- // Chase down concats to find first string.
-- // For regexps generated by parser, nested concats are
-- // flattened except when doing so would overflow the 16-bit
-- // limit on the size of a concatenation, so we should never
-- // see more than two here.
-- Regexp* stk[4];
-- int d = 0;
-- while (re->op() == kRegexpConcat) {
-- if (d < arraysize(stk))
-- stk[d++] = re;
-- re = re->sub()[0];
-- }
--
-- // Remove leading string from re.
-- if (re->op() == kRegexpLiteral) {
-- re->rune_ = 0;
-- re->op_ = kRegexpEmptyMatch;
-- } else if (re->op() == kRegexpLiteralString) {
-- if (n >= re->nrunes_) {
-- delete[] re->runes_;
-- re->runes_ = NULL;
-- re->nrunes_ = 0;
-- re->op_ = kRegexpEmptyMatch;
-- } else if (n == re->nrunes_ - 1) {
-- Rune rune = re->runes_[re->nrunes_ - 1];
-- delete[] re->runes_;
-- re->runes_ = NULL;
-- re->nrunes_ = 0;
-- re->rune_ = rune;
-- re->op_ = kRegexpLiteral;
-- } else {
-- re->nrunes_ -= n;
-- memmove(re->runes_, re->runes_ + n, re->nrunes_ * sizeof re->runes_[0]);
-- }
-- }
--
-- // If re is now empty, concatenations might simplify too.
-- while (d-- > 0) {
-- re = stk[d];
-- Regexp** sub = re->sub();
-- if (sub[0]->op() == kRegexpEmptyMatch) {
-- sub[0]->Decref();
-- sub[0] = NULL;
-- // Delete first element of concat.
-- switch (re->nsub()) {
-- case 0:
-- case 1:
-- // Impossible.
-- LOG(DFATAL) << "Concat of " << re->nsub();
-- re->submany_ = NULL;
-- re->op_ = kRegexpEmptyMatch;
-- break;
--
-- case 2: {
-- // Replace re with sub[1].
-- Regexp* old = sub[1];
-- sub[1] = NULL;
-- re->Swap(old);
-- old->Decref();
-- break;
-- }
--
-- default:
-- // Slide down.
-- re->nsub_--;
-- memmove(sub, sub + 1, re->nsub_ * sizeof sub[0]);
-- break;
-- }
-- }
-- }
--}
--
--// Factors common prefixes from alternation.
--// For example,
--// ABC|ABD|AEF|BCX|BCY
--// simplifies to
--// A(B(C|D)|EF)|BC(X|Y)
--// which the normal parse state routines will further simplify to
--// A(B[CD]|EF)|BC[XY]
--//
--// Rewrites sub to contain simplified list to alternate and returns
--// the new length of sub. Adjusts reference counts accordingly
--// (incoming sub[i] decremented, outgoing sub[i] incremented).
--
--// It's too much of a pain to write this code with an explicit stack,
--// so instead we let the caller specify a maximum depth and
--// don't simplify beyond that. There are around 15 words of local
--// variables and parameters in the frame, so allowing 8 levels
--// on a 64-bit machine is still less than a kilobyte of stack and
--// probably enough benefit for practical uses.
--const int kFactorAlternationMaxDepth = 8;
--
--int Regexp::FactorAlternation(
-- Regexp** sub, int n,
-- Regexp::ParseFlags altflags) {
-- return FactorAlternationRecursive(sub, n, altflags,
-- kFactorAlternationMaxDepth);
--}
--
--int Regexp::FactorAlternationRecursive(
-- Regexp** sub, int n,
-- Regexp::ParseFlags altflags,
-- int maxdepth) {
--
-- if (maxdepth <= 0)
-- return n;
--
-- // Round 1: Factor out common literal prefixes.
-- Rune *rune = NULL;
-- int nrune = 0;
-- Regexp::ParseFlags runeflags = Regexp::NoParseFlags;
-- int start = 0;
-- int out = 0;
-- for (int i = 0; i <= n; i++) {
-- // Invariant: what was in sub[0:start] has been Decref'ed
-- // and that space has been reused for sub[0:out] (out <= start).
-- //
-- // Invariant: sub[start:i] consists of regexps that all begin
-- // with the string rune[0:nrune].
--
-- Rune* rune_i = NULL;
-- int nrune_i = 0;
-- Regexp::ParseFlags runeflags_i = Regexp::NoParseFlags;
-- if (i < n) {
-- rune_i = LeadingString(sub[i], &nrune_i, &runeflags_i);
-- if (runeflags_i == runeflags) {
-- int same = 0;
-- while (same < nrune && same < nrune_i && rune[same] == rune_i[same])
-- same++;
-- if (same > 0) {
-- // Matches at least one rune in current range. Keep going around.
-- nrune = same;
-- continue;
-- }
-- }
-- }
--
-- // Found end of a run with common leading literal string:
-- // sub[start:i] all begin with rune[0:nrune] but sub[i]
-- // does not even begin with rune[0].
-- //
-- // Factor out common string and append factored expression to sub[0:out].
-- if (i == start) {
-- // Nothing to do - first iteration.
-- } else if (i == start+1) {
-- // Just one: don't bother factoring.
-- sub[out++] = sub[start];
-- } else {
-- // Construct factored form: prefix(suffix1|suffix2|...)
-- Regexp* x[2]; // x[0] = prefix, x[1] = suffix1|suffix2|...
-- x[0] = LiteralString(rune, nrune, runeflags);
-- for (int j = start; j < i; j++)
-- RemoveLeadingString(sub[j], nrune);
-- int nn = FactorAlternationRecursive(sub + start, i - start, altflags,
-- maxdepth - 1);
-- x[1] = AlternateNoFactor(sub + start, nn, altflags);
-- sub[out++] = Concat(x, 2, altflags);
-- }
--
-- // Prepare for next round (if there is one).
-- if (i < n) {
-- start = i;
-- rune = rune_i;
-- nrune = nrune_i;
-- runeflags = runeflags_i;
-- }
-- }
-- n = out;
--
-- // Round 2: Factor out common complex prefixes,
-- // just the first piece of each concatenation,
-- // whatever it is. This is good enough a lot of the time.
-- start = 0;
-- out = 0;
-- Regexp* first = NULL;
-- for (int i = 0; i <= n; i++) {
-- // Invariant: what was in sub[0:start] has been Decref'ed
-- // and that space has been reused for sub[0:out] (out <= start).
-- //
-- // Invariant: sub[start:i] consists of regexps that all begin with first.
--
-- Regexp* first_i = NULL;
-- if (i < n) {
-- first_i = LeadingRegexp(sub[i]);
-- if (first != NULL && Regexp::Equal(first, first_i)) {
-- continue;
-- }
-- }
--
-- // Found end of a run with common leading regexp:
-- // sub[start:i] all begin with first but sub[i] does not.
-- //
-- // Factor out common regexp and append factored expression to sub[0:out].
-- if (i == start) {
-- // Nothing to do - first iteration.
-- } else if (i == start+1) {
-- // Just one: don't bother factoring.
-- sub[out++] = sub[start];
-- } else {
-- // Construct factored form: prefix(suffix1|suffix2|...)
-- Regexp* x[2]; // x[0] = prefix, x[1] = suffix1|suffix2|...
-- x[0] = first->Incref();
-- for (int j = start; j < i; j++)
-- sub[j] = RemoveLeadingRegexp(sub[j]);
-- int nn = FactorAlternationRecursive(sub + start, i - start, altflags,
-- maxdepth - 1);
-- x[1] = AlternateNoFactor(sub + start, nn, altflags);
-- sub[out++] = Concat(x, 2, altflags);
-- }
--
-- // Prepare for next round (if there is one).
-- if (i < n) {
-- start = i;
-- first = first_i;
-- }
-- }
-- n = out;
--
-- // Round 3: Collapse runs of single literals into character classes.
-- start = 0;
-- out = 0;
-- for (int i = 0; i <= n; i++) {
-- // Invariant: what was in sub[0:start] has been Decref'ed
-- // and that space has been reused for sub[0:out] (out <= start).
-- //
-- // Invariant: sub[start:i] consists of regexps that are either
-- // literal runes or character classes.
--
-- if (i < n &&
-- (sub[i]->op() == kRegexpLiteral ||
-- sub[i]->op() == kRegexpCharClass))
-- continue;
--
-- // sub[i] is not a char or char class;
-- // emit char class for sub[start:i]...
-- if (i == start) {
-- // Nothing to do.
-- } else if (i == start+1) {
-- sub[out++] = sub[start];
-- } else {
-- // Make new char class.
-- CharClassBuilder ccb;
-- for (int j = start; j < i; j++) {
-- Regexp* re = sub[j];
-- if (re->op() == kRegexpCharClass) {
-- CharClass* cc = re->cc();
-- for (CharClass::iterator it = cc->begin(); it != cc->end(); ++it)
-- ccb.AddRange(it->lo, it->hi);
-- } else if (re->op() == kRegexpLiteral) {
-- ccb.AddRangeFlags(re->rune(), re->rune(), re->parse_flags());
-- } else {
-- LOG(DFATAL) << "RE2: unexpected op: " << re->op() << " "
-- << re->ToString();
-- }
-- re->Decref();
-- }
-- sub[out++] = NewCharClass(ccb.GetCharClass(), altflags);
-- }
--
-- // ... and then emit sub[i].
-- if (i < n)
-- sub[out++] = sub[i];
-- start = i+1;
-- }
-- n = out;
--
-- // Round 4: Collapse runs of empty matches into single empty match.
-- start = 0;
-- out = 0;
-- for (int i = 0; i < n; i++) {
-- if (i + 1 < n &&
-- sub[i]->op() == kRegexpEmptyMatch &&
-- sub[i+1]->op() == kRegexpEmptyMatch) {
-- sub[i]->Decref();
-- continue;
-- }
-- sub[out++] = sub[i];
-- }
-- n = out;
--
-- return n;
--}
--
--// Collapse the regexps on top of the stack, down to the
--// first marker, into a new op node (op == kRegexpAlternate
--// or op == kRegexpConcat).
--void Regexp::ParseState::DoCollapse(RegexpOp op) {
-- // Scan backward to marker, counting children of composite.
-- int n = 0;
-- Regexp* next = NULL;
-- Regexp* sub;
-- for (sub = stacktop_; sub != NULL && !IsMarker(sub->op()); sub = next) {
-- next = sub->down_;
-- if (sub->op_ == op)
-- n += sub->nsub_;
-- else
-- n++;
-- }
--
-- // If there's just one child, leave it alone.
-- // (Concat of one thing is that one thing; alternate of one thing is same.)
-- if (stacktop_ != NULL && stacktop_->down_ == next)
-- return;
--
-- // Construct op (alternation or concatenation), flattening op of op.
-- Regexp** subs = new Regexp*[n];
-- next = NULL;
-- int i = n;
-- for (sub = stacktop_; sub != NULL && !IsMarker(sub->op()); sub = next) {
-- next = sub->down_;
-- if (sub->op_ == op) {
-- Regexp** sub_subs = sub->sub();
-- for (int k = sub->nsub_ - 1; k >= 0; k--)
-- subs[--i] = sub_subs[k]->Incref();
-- sub->Decref();
-- } else {
-- subs[--i] = FinishRegexp(sub);
-- }
-- }
--
-- Regexp* re = ConcatOrAlternate(op, subs, n, flags_, true);
-- delete[] subs;
-- re->simple_ = re->ComputeSimple();
-- re->down_ = next;
-- stacktop_ = re;
--}
--
--// Finishes the current concatenation,
--// collapsing it into a single regexp on the stack.
--void Regexp::ParseState::DoConcatenation() {
-- Regexp* r1 = stacktop_;
-- if (r1 == NULL || IsMarker(r1->op())) {
-- // empty concatenation is special case
-- Regexp* re = new Regexp(kRegexpEmptyMatch, flags_);
-- PushRegexp(re);
-- }
-- DoCollapse(kRegexpConcat);
--}
--
--// Finishes the current alternation,
--// collapsing it to a single regexp on the stack.
--void Regexp::ParseState::DoAlternation() {
-- DoVerticalBar();
-- // Now stack top is kVerticalBar.
-- Regexp* r1 = stacktop_;
-- stacktop_ = r1->down_;
-- r1->Decref();
-- DoCollapse(kRegexpAlternate);
--}
--
--// Incremental conversion of concatenated literals into strings.
--// If top two elements on stack are both literal or string,
--// collapse into single string.
--// Don't walk down the stack -- the parser calls this frequently
--// enough that below the bottom two is known to be collapsed.
--// Only called when another regexp is about to be pushed
--// on the stack, so that the topmost literal is not being considered.
--// (Otherwise ab* would turn into (ab)*.)
--// If r >= 0, consider pushing a literal r on the stack.
--// Return whether that happened.
--bool Regexp::ParseState::MaybeConcatString(int r, ParseFlags flags) {
-- Regexp* re1;
-- Regexp* re2;
-- if ((re1 = stacktop_) == NULL || (re2 = re1->down_) == NULL)
-- return false;
--
-- if (re1->op_ != kRegexpLiteral && re1->op_ != kRegexpLiteralString)
-- return false;
-- if (re2->op_ != kRegexpLiteral && re2->op_ != kRegexpLiteralString)
-- return false;
-- if ((re1->parse_flags_ & FoldCase) != (re2->parse_flags_ & FoldCase))
-- return false;
--
-- if (re2->op_ == kRegexpLiteral) {
-- // convert into string
-- Rune rune = re2->rune_;
-- re2->op_ = kRegexpLiteralString;
-- re2->nrunes_ = 0;
-- re2->runes_ = NULL;
-- re2->AddRuneToString(rune);
-- }
--
-- // push re1 into re2.
-- if (re1->op_ == kRegexpLiteral) {
-- re2->AddRuneToString(re1->rune_);
-- } else {
-- for (int i = 0; i < re1->nrunes_; i++)
-- re2->AddRuneToString(re1->runes_[i]);
-- re1->nrunes_ = 0;
-- delete[] re1->runes_;
-- re1->runes_ = NULL;
-- }
--
-- // reuse re1 if possible
-- if (r >= 0) {
-- re1->op_ = kRegexpLiteral;
-- re1->rune_ = r;
-- re1->parse_flags_ = flags;
-- return true;
-- }
--
-- stacktop_ = re2;
-- re1->Decref();
-- return false;
--}
--
--// Lexing routines.
--
--// Parses a decimal integer, storing it in *n.
--// Sets *s to span the remainder of the string.
--// Sets *out_re to the regexp for the class.
--static bool ParseInteger(StringPiece* s, int* np) {
-- if (s->size() == 0 || !isdigit((*s)[0] & 0xFF))
-- return false;
-- // Disallow leading zeros.
-- if (s->size() >= 2 && (*s)[0] == '0' && isdigit((*s)[1] & 0xFF))
-- return false;
-- int n = 0;
-- int c;
-- while (s->size() > 0 && isdigit(c = (*s)[0] & 0xFF)) {
-- // Avoid overflow.
-- if (n >= 100000000)
-- return false;
-- n = n*10 + c - '0';
-- s->remove_prefix(1); // digit
-- }
-- *np = n;
-- return true;
--}
--
--// Parses a repetition suffix like {1,2} or {2} or {2,}.
--// Sets *s to span the remainder of the string on success.
--// Sets *lo and *hi to the given range.
--// In the case of {2,}, the high number is unbounded;
--// sets *hi to -1 to signify this.
--// {,2} is NOT a valid suffix.
--// The Maybe in the name signifies that the regexp parse
--// doesn't fail even if ParseRepetition does, so the StringPiece
--// s must NOT be edited unless MaybeParseRepetition returns true.
--static bool MaybeParseRepetition(StringPiece* sp, int* lo, int* hi) {
-- StringPiece s = *sp;
-- if (s.size() == 0 || s[0] != '{')
-- return false;
-- s.remove_prefix(1); // '{'
-- if (!ParseInteger(&s, lo))
-- return false;
-- if (s.size() == 0)
-- return false;
-- if (s[0] == ',') {
-- s.remove_prefix(1); // ','
-- if (s.size() == 0)
-- return false;
-- if (s[0] == '}') {
-- // {2,} means at least 2
-- *hi = -1;
-- } else {
-- // {2,4} means 2, 3, or 4.
-- if (!ParseInteger(&s, hi))
-- return false;
-- }
-- } else {
-- // {2} means exactly two
-- *hi = *lo;
-- }
-- if (s.size() == 0 || s[0] != '}')
-- return false;
-- s.remove_prefix(1); // '}'
-- *sp = s;
-- return true;
--}
--
--// Removes the next Rune from the StringPiece and stores it in *r.
--// Returns number of bytes removed from sp.
--// Behaves as though there is a terminating NUL at the end of sp.
--// Argument order is backwards from usual Google style
--// but consistent with chartorune.
--static int StringPieceToRune(Rune *r, StringPiece *sp, RegexpStatus* status) {
-- int n;
-- if (fullrune(sp->data(), sp->size())) {
-- n = chartorune(r, sp->data());
-- if (!(n == 1 && *r == Runeerror)) { // no decoding error
-- sp->remove_prefix(n);
-- return n;
-- }
-- }
--
-- status->set_code(kRegexpBadUTF8);
-- status->set_error_arg(NULL);
-- return -1;
--}
--
--// Return whether name is valid UTF-8.
--// If not, set status to kRegexpBadUTF8.
--static bool IsValidUTF8(const StringPiece& s, RegexpStatus* status) {
-- StringPiece t = s;
-- Rune r;
-- while (t.size() > 0) {
-- if (StringPieceToRune(&r, &t, status) < 0)
-- return false;
-- }
-- return true;
--}
--
--// Is c a hex digit?
--static int IsHex(int c) {
-- return ('0' <= c && c <= '9') ||
-- ('A' <= c && c <= 'F') ||
-- ('a' <= c && c <= 'f');
--}
--
--// Convert hex digit to value.
--static int UnHex(int c) {
-- if ('0' <= c && c <= '9')
-- return c - '0';
-- if ('A' <= c && c <= 'F')
-- return c - 'A' + 10;
-- if ('a' <= c && c <= 'f')
-- return c - 'a' + 10;
-- LOG(DFATAL) << "Bad hex digit " << c;
-- return 0;
--}
--
--// Parse an escape sequence (e.g., \n, \{).
--// Sets *s to span the remainder of the string.
--// Sets *rp to the named character.
--static bool ParseEscape(StringPiece* s, Rune* rp,
-- RegexpStatus* status, int rune_max) {
-- const char* begin = s->begin();
-- if (s->size() < 1 || (*s)[0] != '\\') {
-- // Should not happen - caller always checks.
-- status->set_code(kRegexpInternalError);
-- status->set_error_arg(NULL);
-- return false;
-- }
-- if (s->size() < 2) {
-- status->set_code(kRegexpTrailingBackslash);
-- status->set_error_arg(NULL);
-- return false;
-- }
-- Rune c, c1;
-- s->remove_prefix(1); // backslash
-- if (StringPieceToRune(&c, s, status) < 0)
-- return false;
-- int code;
-- switch (c) {
-- default:
-- if (c < Runeself && !isalpha(c) && !isdigit(c)) {
-- // Escaped non-word characters are always themselves.
-- // PCRE is not quite so rigorous: it accepts things like
-- // \q, but we don't. We once rejected \_, but too many
-- // programs and people insist on using it, so allow \_.
-- *rp = c;
-- return true;
-- }
-- goto BadEscape;
--
-- // Octal escapes.
-- case '1':
-- case '2':
-- case '3':
-- case '4':
-- case '5':
-- case '6':
-- case '7':
-- // Single non-zero octal digit is a backreference; not supported.
-- if (s->size() == 0 || (*s)[0] < '0' || (*s)[0] > '7')
-- goto BadEscape;
-- // fall through
-- case '0':
-- // consume up to three octal digits; already have one.
-- code = c - '0';
-- if (s->size() > 0 && '0' <= (c = (*s)[0]) && c <= '7') {
-- code = code * 8 + c - '0';
-- s->remove_prefix(1); // digit
-- if (s->size() > 0) {
-- c = (*s)[0];
-- if ('0' <= c && c <= '7') {
-- code = code * 8 + c - '0';
-- s->remove_prefix(1); // digit
-- }
-- }
-- }
-- *rp = code;
-- return true;
--
-- // Hexadecimal escapes
-- case 'x':
-- if (s->size() == 0)
-- goto BadEscape;
-- if (StringPieceToRune(&c, s, status) < 0)
-- return false;
-- if (c == '{') {
-- // Any number of digits in braces.
-- // Update n as we consume the string, so that
-- // the whole thing gets shown in the error message.
-- // Perl accepts any text at all; it ignores all text
-- // after the first non-hex digit. We require only hex digits,
-- // and at least one.
-- if (StringPieceToRune(&c, s, status) < 0)
-- return false;
-- int nhex = 0;
-- code = 0;
-- while (IsHex(c)) {
-- nhex++;
-- code = code * 16 + UnHex(c);
-- if (code > rune_max)
-- goto BadEscape;
-- if (s->size() == 0)
-- goto BadEscape;
-- if (StringPieceToRune(&c, s, status) < 0)
-- return false;
-- }
-- if (c != '}' || nhex == 0)
-- goto BadEscape;
-- *rp = code;
-- return true;
-- }
-- // Easy case: two hex digits.
-- if (s->size() == 0)
-- goto BadEscape;
-- if (StringPieceToRune(&c1, s, status) < 0)
-- return false;
-- if (!IsHex(c) || !IsHex(c1))
-- goto BadEscape;
-- *rp = UnHex(c) * 16 + UnHex(c1);
-- return true;
--
-- // C escapes.
-- case 'n':
-- *rp = '\n';
-- return true;
-- case 'r':
-- *rp = '\r';
-- return true;
-- case 't':
-- *rp = '\t';
-- return true;
--
-- // Less common C escapes.
-- case 'a':
-- *rp = '\a';
-- return true;
-- case 'f':
-- *rp = '\f';
-- return true;
-- case 'v':
-- *rp = '\v';
-- return true;
--
-- // This code is disabled to avoid misparsing
-- // the Perl word-boundary \b as a backspace
-- // when in POSIX regexp mode. Surprisingly,
-- // in Perl, \b means word-boundary but [\b]
-- // means backspace. We don't support that:
-- // if you want a backspace embed a literal
-- // backspace character or use \x08.
-- //
-- // case 'b':
-- // *rp = '\b';
-- // return true;
-- }
--
-- LOG(DFATAL) << "Not reached in ParseEscape.";
--
--BadEscape:
-- // Unrecognized escape sequence.
-- status->set_code(kRegexpBadEscape);
-- status->set_error_arg(StringPiece(begin, s->data() - begin));
-- return false;
--}
--
--// Add a range to the character class, but exclude newline if asked.
--// Also handle case folding.
--void CharClassBuilder::AddRangeFlags(
-- Rune lo, Rune hi, Regexp::ParseFlags parse_flags) {
--
-- // Take out \n if the flags say so.
-- bool cutnl = !(parse_flags & Regexp::ClassNL) ||
-- (parse_flags & Regexp::NeverNL);
-- if (cutnl && lo <= '\n' && '\n' <= hi) {
-- if (lo < '\n')
-- AddRangeFlags(lo, '\n' - 1, parse_flags);
-- if (hi > '\n')
-- AddRangeFlags('\n' + 1, hi, parse_flags);
-- return;
-- }
--
-- // If folding case, add fold-equivalent characters too.
-- if (parse_flags & Regexp::FoldCase)
-- AddFoldedRange(this, lo, hi, 0);
-- else
-- AddRange(lo, hi);
--}
--
--// Look for a group with the given name.
--static UGroup* LookupGroup(const StringPiece& name,
-- UGroup *groups, int ngroups) {
-- // Simple name lookup.
-- for (int i = 0; i < ngroups; i++)
-- if (StringPiece(groups[i].name) == name)
-- return &groups[i];
-- return NULL;
--}
--
--// Fake UGroup containing all Runes
--static URange16 any16[] = { { 0, 65535 } };
--static URange32 any32[] = { { 65536, Runemax } };
--static UGroup anygroup = { "Any", +1, any16, 1, any32, 1 };
--
--// Look for a POSIX group with the given name (e.g., "[:^alpha:]")
--static UGroup* LookupPosixGroup(const StringPiece& name) {
-- return LookupGroup(name, posix_groups, num_posix_groups);
--}
--
--static UGroup* LookupPerlGroup(const StringPiece& name) {
-- return LookupGroup(name, perl_groups, num_perl_groups);
--}
--
--// Look for a Unicode group with the given name (e.g., "Han")
--static UGroup* LookupUnicodeGroup(const StringPiece& name) {
-- // Special case: "Any" means any.
-- if (name == StringPiece("Any"))
-- return &anygroup;
-- return LookupGroup(name, unicode_groups, num_unicode_groups);
--}
--
--// Add a UGroup or its negation to the character class.
--static void AddUGroup(CharClassBuilder *cc, UGroup *g, int sign,
-- Regexp::ParseFlags parse_flags) {
-- if (sign == +1) {
-- for (int i = 0; i < g->nr16; i++) {
-- cc->AddRangeFlags(g->r16[i].lo, g->r16[i].hi, parse_flags);
-- }
-- for (int i = 0; i < g->nr32; i++) {
-- cc->AddRangeFlags(g->r32[i].lo, g->r32[i].hi, parse_flags);
-- }
-- } else {
-- if (parse_flags & Regexp::FoldCase) {
-- // Normally adding a case-folded group means
-- // adding all the extra fold-equivalent runes too.
-- // But if we're adding the negation of the group,
-- // we have to exclude all the runes that are fold-equivalent
-- // to what's already missing. Too hard, so do in two steps.
-- CharClassBuilder ccb1;
-- AddUGroup(&ccb1, g, +1, parse_flags);
-- ccb1.Negate();
-- cc->AddCharClass(&ccb1);
-- return;
-- }
-- int next = 0;
-- for (int i = 0; i < g->nr16; i++) {
-- if (next < g->r16[i].lo)
-- cc->AddRangeFlags(next, g->r16[i].lo - 1, parse_flags);
-- next = g->r16[i].hi + 1;
-- }
-- for (int i = 0; i < g->nr32; i++) {
-- if (next < g->r32[i].lo)
-- cc->AddRangeFlags(next, g->r32[i].lo - 1, parse_flags);
-- next = g->r32[i].hi + 1;
-- }
-- if (next <= Runemax)
-- cc->AddRangeFlags(next, Runemax, parse_flags);
-- }
--}
--
--// Maybe parse a Perl character class escape sequence.
--// Only recognizes the Perl character classes (\d \s \w \D \S \W),
--// not the Perl empty-string classes (\b \B \A \Z \z).
--// On success, sets *s to span the remainder of the string
--// and returns the corresponding UGroup.
--// The StringPiece must *NOT* be edited unless the call succeeds.
--UGroup* MaybeParsePerlCCEscape(StringPiece* s, Regexp::ParseFlags parse_flags) {
-- if (!(parse_flags & Regexp::PerlClasses))
-- return NULL;
-- if (s->size() < 2 || (*s)[0] != '\\')
-- return NULL;
-- // Could use StringPieceToRune, but there aren't
-- // any non-ASCII Perl group names.
-- StringPiece name(s->begin(), 2);
-- UGroup *g = LookupPerlGroup(name);
-- if (g == NULL)
-- return NULL;
-- s->remove_prefix(name.size());
-- return g;
--}
--
--enum ParseStatus {
-- kParseOk, // Did some parsing.
-- kParseError, // Found an error.
-- kParseNothing, // Decided not to parse.
--};
--
--// Maybe parses a Unicode character group like \p{Han} or \P{Han}
--// (the latter is a negated group).
--ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
-- CharClassBuilder *cc,
-- RegexpStatus* status) {
-- // Decide whether to parse.
-- if (!(parse_flags & Regexp::UnicodeGroups))
-- return kParseNothing;
-- if (s->size() < 2 || (*s)[0] != '\\')
-- return kParseNothing;
-- Rune c = (*s)[1];
-- if (c != 'p' && c != 'P')
-- return kParseNothing;
--
-- // Committed to parse. Results:
-- int sign = +1; // -1 = negated char class
-- if (c == 'P')
-- sign = -1;
-- StringPiece seq = *s; // \p{Han} or \pL
-- StringPiece name; // Han or L
-- s->remove_prefix(2); // '\\', 'p'
--
-- if (!StringPieceToRune(&c, s, status))
-- return kParseError;
-- if (c != '{') {
-- // Name is the bit of string we just skipped over for c.
-- const char* p = seq.begin() + 2;
-- name = StringPiece(p, s->begin() - p);
-- } else {
-- // Name is in braces. Look for closing }
-- int end = s->find('}', 0);
-- if (end == s->npos) {
-- if (!IsValidUTF8(seq, status))
-- return kParseError;
-- status->set_code(kRegexpBadCharRange);
-- status->set_error_arg(seq);
-- return kParseError;
-- }
-- name = StringPiece(s->begin(), end); // without '}'
-- s->remove_prefix(end + 1); // with '}'
-- if (!IsValidUTF8(name, status))
-- return kParseError;
-- }
--
-- // Chop seq where s now begins.
-- seq = StringPiece(seq.begin(), s->begin() - seq.begin());
--
-- // Look up group
-- if (name.size() > 0 && name[0] == '^') {
-- sign = -sign;
-- name.remove_prefix(1); // '^'
-- }
-- UGroup *g = LookupUnicodeGroup(name);
-- if (g == NULL) {
-- status->set_code(kRegexpBadCharRange);
-- status->set_error_arg(seq);
-- return kParseError;
-- }
--
-- AddUGroup(cc, g, sign, parse_flags);
-- return kParseOk;
--}
--
--// Parses a character class name like [:alnum:].
--// Sets *s to span the remainder of the string.
--// Adds the ranges corresponding to the class to ranges.
--static ParseStatus ParseCCName(StringPiece* s, Regexp::ParseFlags parse_flags,
-- CharClassBuilder *cc,
-- RegexpStatus* status) {
-- // Check begins with [:
-- const char* p = s->data();
-- const char* ep = s->data() + s->size();
-- if (ep - p < 2 || p[0] != '[' || p[1] != ':')
-- return kParseNothing;
--
-- // Look for closing :].
-- const char* q;
-- for (q = p+2; q <= ep-2 && (*q != ':' || *(q+1) != ']'); q++)
-- ;
--
-- // If no closing :], then ignore.
-- if (q > ep-2)
-- return kParseNothing;
--
-- // Got it. Check that it's valid.
-- q += 2;
-- StringPiece name(p, q-p);
--
-- UGroup *g = LookupPosixGroup(name);
-- if (g == NULL) {
-- status->set_code(kRegexpBadCharRange);
-- status->set_error_arg(name);
-- return kParseError;
-- }
--
-- s->remove_prefix(name.size());
-- AddUGroup(cc, g, g->sign, parse_flags);
-- return kParseOk;
--}
--
--// Parses a character inside a character class.
--// There are fewer special characters here than in the rest of the regexp.
--// Sets *s to span the remainder of the string.
--// Sets *rp to the character.
--bool Regexp::ParseState::ParseCCCharacter(StringPiece* s, Rune *rp,
-- const StringPiece& whole_class,
-- RegexpStatus* status) {
-- if (s->size() == 0) {
-- status->set_code(kRegexpMissingBracket);
-- status->set_error_arg(whole_class);
-- return false;
-- }
--
-- // Allow regular escape sequences even though
-- // many need not be escaped in this context.
-- if (s->size() >= 1 && (*s)[0] == '\\')
-- return ParseEscape(s, rp, status, rune_max_);
--
-- // Otherwise take the next rune.
-- return StringPieceToRune(rp, s, status) >= 0;
--}
--
--// Parses a character class character, or, if the character
--// is followed by a hyphen, parses a character class range.
--// For single characters, rr->lo == rr->hi.
--// Sets *s to span the remainder of the string.
--// Sets *rp to the character.
--bool Regexp::ParseState::ParseCCRange(StringPiece* s, RuneRange* rr,
-- const StringPiece& whole_class,
-- RegexpStatus* status) {
-- StringPiece os = *s;
-- if (!ParseCCCharacter(s, &rr->lo, whole_class, status))
-- return false;
-- // [a-] means (a|-), so check for final ].
-- if (s->size() >= 2 && (*s)[0] == '-' && (*s)[1] != ']') {
-- s->remove_prefix(1); // '-'
-- if (!ParseCCCharacter(s, &rr->hi, whole_class, status))
-- return false;
-- if (rr->hi < rr->lo) {
-- status->set_code(kRegexpBadCharRange);
-- status->set_error_arg(StringPiece(os.data(), s->data() - os.data()));
-- return false;
-- }
-- } else {
-- rr->hi = rr->lo;
-- }
-- return true;
--}
--
--// Parses a possibly-negated character class expression like [^abx-z[:digit:]].
--// Sets *s to span the remainder of the string.
--// Sets *out_re to the regexp for the class.
--bool Regexp::ParseState::ParseCharClass(StringPiece* s,
-- Regexp** out_re,
-- RegexpStatus* status) {
-- StringPiece whole_class = *s;
-- if (s->size() == 0 || (*s)[0] != '[') {
-- // Caller checked this.
-- status->set_code(kRegexpInternalError);
-- status->set_error_arg(NULL);
-- return false;
-- }
-- bool negated = false;
-- Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase);
-- re->ccb_ = new CharClassBuilder;
-- s->remove_prefix(1); // '['
-- if (s->size() > 0 && (*s)[0] == '^') {
-- s->remove_prefix(1); // '^'
-- negated = true;
-- if (!(flags_ & ClassNL) || (flags_ & NeverNL)) {
-- // If NL can't match implicitly, then pretend
-- // negated classes include a leading \n.
-- re->ccb_->AddRange('\n', '\n');
-- }
-- }
-- bool first = true; // ] is okay as first char in class
-- while (s->size() > 0 && ((*s)[0] != ']' || first)) {
-- // - is only okay unescaped as first or last in class.
-- // Except that Perl allows - anywhere.
-- if ((*s)[0] == '-' && !first && !(flags_&PerlX) &&
-- (s->size() == 1 || (*s)[1] != ']')) {
-- StringPiece t = *s;
-- t.remove_prefix(1); // '-'
-- Rune r;
-- int n = StringPieceToRune(&r, &t, status);
-- if (n < 0) {
-- re->Decref();
-- return false;
-- }
-- status->set_code(kRegexpBadCharRange);
-- status->set_error_arg(StringPiece(s->data(), 1+n));
-- re->Decref();
-- return false;
-- }
-- first = false;
--
-- // Look for [:alnum:] etc.
-- if (s->size() > 2 && (*s)[0] == '[' && (*s)[1] == ':') {
-- switch (ParseCCName(s, flags_, re->ccb_, status)) {
-- case kParseOk:
-- continue;
-- case kParseError:
-- re->Decref();
-- return false;
-- case kParseNothing:
-- break;
-- }
-- }
--
-- // Look for Unicode character group like \p{Han}
-- if (s->size() > 2 &&
-- (*s)[0] == '\\' &&
-- ((*s)[1] == 'p' || (*s)[1] == 'P')) {
-- switch (ParseUnicodeGroup(s, flags_, re->ccb_, status)) {
-- case kParseOk:
-- continue;
-- case kParseError:
-- re->Decref();
-- return false;
-- case kParseNothing:
-- break;
-- }
-- }
--
-- // Look for Perl character class symbols (extension).
-- UGroup *g = MaybeParsePerlCCEscape(s, flags_);
-- if (g != NULL) {
-- AddUGroup(re->ccb_, g, g->sign, flags_);
-- continue;
-- }
--
-- // Otherwise assume single character or simple range.
-- RuneRange rr;
-- if (!ParseCCRange(s, &rr, whole_class, status)) {
-- re->Decref();
-- return false;
-- }
-- // AddRangeFlags is usually called in response to a class like
-- // \p{Foo} or [[:foo:]]; for those, it filters \n out unless
-- // Regexp::ClassNL is set. In an explicit range or singleton
-- // like we just parsed, we do not filter \n out, so set ClassNL
-- // in the flags.
-- re->ccb_->AddRangeFlags(rr.lo, rr.hi, flags_ | Regexp::ClassNL);
-- }
-- if (s->size() == 0) {
-- status->set_code(kRegexpMissingBracket);
-- status->set_error_arg(whole_class);
-- re->Decref();
-- return false;
-- }
-- s->remove_prefix(1); // ']'
--
-- if (negated)
-- re->ccb_->Negate();
-- re->ccb_->RemoveAbove(rune_max_);
--
-- *out_re = re;
-- return true;
--}
--
--// Is this a valid capture name? [A-Za-z0-9_]+
--// PCRE limits names to 32 bytes.
--// Python rejects names starting with digits.
--// We don't enforce either of those.
--static bool IsValidCaptureName(const StringPiece& name) {
-- if (name.size() == 0)
-- return false;
-- for (int i = 0; i < name.size(); i++) {
-- int c = name[i];
-- if (('0' <= c && c <= '9') ||
-- ('a' <= c && c <= 'z') ||
-- ('A' <= c && c <= 'Z') ||
-- c == '_')
-- continue;
-- return false;
-- }
-- return true;
--}
--
--// Parses a Perl flag setting or non-capturing group or both,
--// like (?i) or (?: or (?i:. Removes from s, updates parse state.
--// The caller must check that s begins with "(?".
--// Returns true on success. If the Perl flag is not
--// well-formed or not supported, sets status_ and returns false.
--bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
-- StringPiece t = *s;
--
-- // Caller is supposed to check this.
-- if (!(flags_ & PerlX) || t.size() < 2 || t[0] != '(' || t[1] != '?') {
-- LOG(DFATAL) << "Bad call to ParseState::ParsePerlFlags";
-- status_->set_code(kRegexpInternalError);
-- return false;
-- }
--
-- t.remove_prefix(2); // "(?"
--
-- // Check for named captures, first introduced in Python's regexp library.
-- // As usual, there are three slightly different syntaxes:
-- //
-- // (?P<name>expr) the original, introduced by Python
-- // (?<name>expr) the .NET alteration, adopted by Perl 5.10
-- // (?'name'expr) another .NET alteration, adopted by Perl 5.10
-- //
-- // Perl 5.10 gave in and implemented the Python version too,
-- // but they claim that the last two are the preferred forms.
-- // PCRE and languages based on it (specifically, PHP and Ruby)
-- // support all three as well. EcmaScript 4 uses only the Python form.
-- //
-- // In both the open source world (via Code Search) and the
-- // Google source tree, (?P<expr>name) is the dominant form,
-- // so that's the one we implement. One is enough.
-- if (t.size() > 2 && t[0] == 'P' && t[1] == '<') {
-- // Pull out name.
-- int end = t.find('>', 2);
-- if (end == t.npos) {
-- if (!IsValidUTF8(*s, status_))
-- return false;
-- status_->set_code(kRegexpBadNamedCapture);
-- status_->set_error_arg(*s);
-- return false;
-- }
--
-- // t is "P<name>...", t[end] == '>'
-- StringPiece capture(t.begin()-2, end+3); // "(?P<name>"
-- StringPiece name(t.begin()+2, end-2); // "name"
-- if (!IsValidUTF8(name, status_))
-- return false;
-- if (!IsValidCaptureName(name)) {
-- status_->set_code(kRegexpBadNamedCapture);
-- status_->set_error_arg(capture);
-- return false;
-- }
--
-- if (!DoLeftParen(name)) {
-- // DoLeftParen's failure set status_.
-- return false;
-- }
--
-- s->remove_prefix(capture.end() - s->begin());
-- return true;
-- }
--
-- bool negated = false;
-- bool sawflags = false;
-- int nflags = flags_;
-- Rune c;
-- for (bool done = false; !done; ) {
-- if (t.size() == 0)
-- goto BadPerlOp;
-- if (StringPieceToRune(&c, &t, status_) < 0)
-- return false;
-- switch (c) {
-- default:
-- goto BadPerlOp;
--
-- // Parse flags.
-- case 'i':
-- sawflags = true;
-- if (negated)
-- nflags &= ~FoldCase;
-- else
-- nflags |= FoldCase;
-- break;
--
-- case 'm': // opposite of our OneLine
-- sawflags = true;
-- if (negated)
-- nflags |= OneLine;
-- else
-- nflags &= ~OneLine;
-- break;
--
-- case 's':
-- sawflags = true;
-- if (negated)
-- nflags &= ~DotNL;
-- else
-- nflags |= DotNL;
-- break;
--
-- case 'U':
-- sawflags = true;
-- if (negated)
-- nflags &= ~NonGreedy;
-- else
-- nflags |= NonGreedy;
-- break;
--
-- // Negation
-- case '-':
-- if (negated)
-- goto BadPerlOp;
-- negated = true;
-- sawflags = false;
-- break;
--
-- // Open new group.
-- case ':':
-- if (!DoLeftParenNoCapture()) {
-- // DoLeftParenNoCapture's failure set status_.
-- return false;
-- }
-- done = true;
-- break;
--
-- // Finish flags.
-- case ')':
-- done = true;
-- break;
-- }
-- }
--
-- if (negated && !sawflags)
-- goto BadPerlOp;
--
-- flags_ = static_cast<Regexp::ParseFlags>(nflags);
-- *s = t;
-- return true;
--
--BadPerlOp:
-- status_->set_code(kRegexpBadPerlOp);
-- status_->set_error_arg(StringPiece(s->begin(), t.begin() - s->begin()));
-- return false;
--}
--
--// Converts latin1 (assumed to be encoded as Latin1 bytes)
--// into UTF8 encoding in string.
--// Can't use EncodingUtils::EncodeLatin1AsUTF8 because it is
--// deprecated and because it rejects code points 0x80-0x9F.
--void ConvertLatin1ToUTF8(const StringPiece& latin1, string* utf) {
-- char buf[UTFmax];
--
-- utf->clear();
-- for (int i = 0; i < latin1.size(); i++) {
-- Rune r = latin1[i] & 0xFF;
-- int n = runetochar(buf, &r);
-- utf->append(buf, n);
-- }
--}
--
--// Parses the regular expression given by s,
--// returning the corresponding Regexp tree.
--// The caller must Decref the return value when done with it.
--// Returns NULL on error.
--Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
-- RegexpStatus* status) {
-- // Make status non-NULL (easier on everyone else).
-- RegexpStatus xstatus;
-- if (status == NULL)
-- status = &xstatus;
--
-- ParseState ps(global_flags, s, status);
-- StringPiece t = s;
--
-- // Convert regexp to UTF-8 (easier on the rest of the parser).
-- if (global_flags & Latin1) {
-- string* tmp = new string;
-- ConvertLatin1ToUTF8(t, tmp);
-- status->set_tmp(tmp);
-- t = *tmp;
-- }
--
-- if (global_flags & Literal) {
-- // Special parse loop for literal string.
-- while (t.size() > 0) {
-- Rune r;
-- if (StringPieceToRune(&r, &t, status) < 0)
-- return NULL;
-- if (!ps.PushLiteral(r))
-- return NULL;
-- }
-- return ps.DoFinish();
-- }
--
-- StringPiece lastunary = NULL;
-- while (t.size() > 0) {
-- StringPiece isunary = NULL;
-- switch (t[0]) {
-- default: {
-- Rune r;
-- if (StringPieceToRune(&r, &t, status) < 0)
-- return NULL;
-- if (!ps.PushLiteral(r))
-- return NULL;
-- break;
-- }
--
-- case '(':
-- // "(?" introduces Perl escape.
-- if ((ps.flags() & PerlX) && (t.size() >= 2 && t[1] == '?')) {
-- // Flag changes and non-capturing groups.
-- if (!ps.ParsePerlFlags(&t))
-- return NULL;
-- break;
-- }
-- if (!ps.DoLeftParen(NULL))
-- return NULL;
-- t.remove_prefix(1); // '('
-- break;
--
-- case '|':
-- if (!ps.DoVerticalBar())
-- return NULL;
-- t.remove_prefix(1); // '|'
-- break;
--
-- case ')':
-- if (!ps.DoRightParen())
-- return NULL;
-- t.remove_prefix(1); // ')'
-- break;
--
-- case '^': // Beginning of line.
-- if (!ps.PushCarat())
-- return NULL;
-- t.remove_prefix(1); // '^'
-- break;
--
-- case '$': // End of line.
-- if (!ps.PushDollar())
-- return NULL;
-- t.remove_prefix(1); // '$'
-- break;
--
-- case '.': // Any character (possibly except newline).
-- if (!ps.PushDot())
-- return NULL;
-- t.remove_prefix(1); // '.'
-- break;
--
-- case '[': { // Character class.
-- Regexp* re;
-- if (!ps.ParseCharClass(&t, &re, status))
-- return NULL;
-- if (!ps.PushRegexp(re))
-- return NULL;
-- break;
-- }
--
-- case '*': { // Zero or more.
-- RegexpOp op;
-- op = kRegexpStar;
-- goto Rep;
-- case '+': // One or more.
-- op = kRegexpPlus;
-- goto Rep;
-- case '?': // Zero or one.
-- op = kRegexpQuest;
-- goto Rep;
-- Rep:
-- StringPiece opstr = t;
-- bool nongreedy = false;
-- t.remove_prefix(1); // '*' or '+' or '?'
-- if (ps.flags() & PerlX) {
-- if (t.size() > 0 && t[0] == '?') {
-- nongreedy = true;
-- t.remove_prefix(1); // '?'
-- }
-- if (lastunary.size() > 0) {
-- // In Perl it is not allowed to stack repetition operators:
-- // a** is a syntax error, not a double-star.
-- // (and a++ means something else entirely, which we don't support!)
-- status->set_code(kRegexpRepeatOp);
-- status->set_error_arg(StringPiece(lastunary.begin(),
-- t.begin() - lastunary.begin()));
-- return NULL;
-- }
-- }
-- opstr.set(opstr.data(), t.data() - opstr.data());
-- if (!ps.PushRepeatOp(op, opstr, nongreedy))
-- return NULL;
-- isunary = opstr;
-- break;
-- }
--
-- case '{': { // Counted repetition.
-- int lo, hi;
-- StringPiece opstr = t;
-- if (!MaybeParseRepetition(&t, &lo, &hi)) {
-- // Treat like a literal.
-- if (!ps.PushLiteral('{'))
-- return NULL;
-- t.remove_prefix(1); // '{'
-- break;
-- }
-- bool nongreedy = false;
-- if (ps.flags() & PerlX) {
-- if (t.size() > 0 && t[0] == '?') {
-- nongreedy = true;
-- t.remove_prefix(1); // '?'
-- }
-- if (lastunary.size() > 0) {
-- // Not allowed to stack repetition operators.
-- status->set_code(kRegexpRepeatOp);
-- status->set_error_arg(StringPiece(lastunary.begin(),
-- t.begin() - lastunary.begin()));
-- return NULL;
-- }
-- }
-- opstr.set(opstr.data(), t.data() - opstr.data());
-- if (!ps.PushRepetition(lo, hi, opstr, nongreedy))
-- return NULL;
-- isunary = opstr;
-- break;
-- }
--
-- case '\\': { // Escaped character or Perl sequence.
-- // \b and \B: word boundary or not
-- if ((ps.flags() & Regexp::PerlB) &&
-- t.size() >= 2 && (t[1] == 'b' || t[1] == 'B')) {
-- if (!ps.PushWordBoundary(t[1] == 'b'))
-- return NULL;
-- t.remove_prefix(2); // '\\', 'b'
-- break;
-- }
--
-- if ((ps.flags() & Regexp::PerlX) && t.size() >= 2) {
-- if (t[1] == 'A') {
-- if (!ps.PushSimpleOp(kRegexpBeginText))
-- return NULL;
-- t.remove_prefix(2); // '\\', 'A'
-- break;
-- }
-- if (t[1] == 'z') {
-- if (!ps.PushSimpleOp(kRegexpEndText))
-- return NULL;
-- t.remove_prefix(2); // '\\', 'z'
-- break;
-- }
-- // Do not recognize \Z, because this library can't
-- // implement the exact Perl/PCRE semantics.
-- // (This library treats "(?-m)$" as \z, even though
-- // in Perl and PCRE it is equivalent to \Z.)
--
-- if (t[1] == 'C') { // \C: any byte [sic]
-- if (!ps.PushSimpleOp(kRegexpAnyByte))
-- return NULL;
-- t.remove_prefix(2); // '\\', 'C'
-- break;
-- }
--
-- if (t[1] == 'Q') { // \Q ... \E: the ... is always literals
-- t.remove_prefix(2); // '\\', 'Q'
-- while (t.size() > 0) {
-- if (t.size() >= 2 && t[0] == '\\' && t[1] == 'E') {
-- t.remove_prefix(2); // '\\', 'E'
-- break;
-- }
-- Rune r;
-- if (StringPieceToRune(&r, &t, status) < 0)
-- return NULL;
-- if (!ps.PushLiteral(r))
-- return NULL;
-- }
-- break;
-- }
-- }
--
-- if (t.size() >= 2 && (t[1] == 'p' || t[1] == 'P')) {
-- Regexp* re = new Regexp(kRegexpCharClass, ps.flags() & ~FoldCase);
-- re->ccb_ = new CharClassBuilder;
-- switch (ParseUnicodeGroup(&t, ps.flags(), re->ccb_, status)) {
-- case kParseOk:
-- if (!ps.PushRegexp(re))
-- return NULL;
-- goto Break2;
-- case kParseError:
-- re->Decref();
-- return NULL;
-- case kParseNothing:
-- re->Decref();
-- break;
-- }
-- }
--
-- UGroup *g = MaybeParsePerlCCEscape(&t, ps.flags());
-- if (g != NULL) {
-- Regexp* re = new Regexp(kRegexpCharClass, ps.flags() & ~FoldCase);
-- re->ccb_ = new CharClassBuilder;
-- AddUGroup(re->ccb_, g, g->sign, ps.flags());
-- if (!ps.PushRegexp(re))
-- return NULL;
-- break;
-- }
--
-- Rune r;
-- if (!ParseEscape(&t, &r, status, ps.rune_max()))
-- return NULL;
-- if (!ps.PushLiteral(r))
-- return NULL;
-- break;
-- }
-- }
-- Break2:
-- lastunary = isunary;
-- }
-- return ps.DoFinish();
--}
--
--} // namespace re2
-diff --git a/re2/re2/perl_groups.cc b/re2/re2/perl_groups.cc
-deleted file mode 100644
-index 1af5b43..0000000
---- a/re2/re2/perl_groups.cc
-+++ /dev/null
-@@ -1,119 +0,0 @@
--// GENERATED BY make_perl_groups.pl; DO NOT EDIT.
--// make_perl_groups.pl >perl_groups.cc
--
--#include "re2/unicode_groups.h"
--
--namespace re2 {
--
--static URange16 code1[] = { /* \d */
-- { 0x30, 0x39 },
--};
--static URange16 code2[] = { /* \s */
-- { 0x9, 0xa },
-- { 0xc, 0xd },
-- { 0x20, 0x20 },
--};
--static URange16 code3[] = { /* \w */
-- { 0x30, 0x39 },
-- { 0x41, 0x5a },
-- { 0x5f, 0x5f },
-- { 0x61, 0x7a },
--};
--UGroup perl_groups[] = {
-- { "\\d", +1, code1, 1 },
-- { "\\D", -1, code1, 1 },
-- { "\\s", +1, code2, 3 },
-- { "\\S", -1, code2, 3 },
-- { "\\w", +1, code3, 4 },
-- { "\\W", -1, code3, 4 },
--};
--int num_perl_groups = 6;
--static URange16 code4[] = { /* [:alnum:] */
-- { 0x30, 0x39 },
-- { 0x41, 0x5a },
-- { 0x61, 0x7a },
--};
--static URange16 code5[] = { /* [:alpha:] */
-- { 0x41, 0x5a },
-- { 0x61, 0x7a },
--};
--static URange16 code6[] = { /* [:ascii:] */
-- { 0x0, 0x7f },
--};
--static URange16 code7[] = { /* [:blank:] */
-- { 0x9, 0x9 },
-- { 0x20, 0x20 },
--};
--static URange16 code8[] = { /* [:cntrl:] */
-- { 0x0, 0x1f },
-- { 0x7f, 0x7f },
--};
--static URange16 code9[] = { /* [:digit:] */
-- { 0x30, 0x39 },
--};
--static URange16 code10[] = { /* [:graph:] */
-- { 0x21, 0x7e },
--};
--static URange16 code11[] = { /* [:lower:] */
-- { 0x61, 0x7a },
--};
--static URange16 code12[] = { /* [:print:] */
-- { 0x20, 0x7e },
--};
--static URange16 code13[] = { /* [:punct:] */
-- { 0x21, 0x2f },
-- { 0x3a, 0x40 },
-- { 0x5b, 0x60 },
-- { 0x7b, 0x7e },
--};
--static URange16 code14[] = { /* [:space:] */
-- { 0x9, 0xd },
-- { 0x20, 0x20 },
--};
--static URange16 code15[] = { /* [:upper:] */
-- { 0x41, 0x5a },
--};
--static URange16 code16[] = { /* [:word:] */
-- { 0x30, 0x39 },
-- { 0x41, 0x5a },
-- { 0x5f, 0x5f },
-- { 0x61, 0x7a },
--};
--static URange16 code17[] = { /* [:xdigit:] */
-- { 0x30, 0x39 },
-- { 0x41, 0x46 },
-- { 0x61, 0x66 },
--};
--UGroup posix_groups[] = {
-- { "[:alnum:]", +1, code4, 3 },
-- { "[:^alnum:]", -1, code4, 3 },
-- { "[:alpha:]", +1, code5, 2 },
-- { "[:^alpha:]", -1, code5, 2 },
-- { "[:ascii:]", +1, code6, 1 },
-- { "[:^ascii:]", -1, code6, 1 },
-- { "[:blank:]", +1, code7, 2 },
-- { "[:^blank:]", -1, code7, 2 },
-- { "[:cntrl:]", +1, code8, 2 },
-- { "[:^cntrl:]", -1, code8, 2 },
-- { "[:digit:]", +1, code9, 1 },
-- { "[:^digit:]", -1, code9, 1 },
-- { "[:graph:]", +1, code10, 1 },
-- { "[:^graph:]", -1, code10, 1 },
-- { "[:lower:]", +1, code11, 1 },
-- { "[:^lower:]", -1, code11, 1 },
-- { "[:print:]", +1, code12, 1 },
-- { "[:^print:]", -1, code12, 1 },
-- { "[:punct:]", +1, code13, 4 },
-- { "[:^punct:]", -1, code13, 4 },
-- { "[:space:]", +1, code14, 2 },
-- { "[:^space:]", -1, code14, 2 },
-- { "[:upper:]", +1, code15, 1 },
-- { "[:^upper:]", -1, code15, 1 },
-- { "[:word:]", +1, code16, 4 },
-- { "[:^word:]", -1, code16, 4 },
-- { "[:xdigit:]", +1, code17, 3 },
-- { "[:^xdigit:]", -1, code17, 3 },
--};
--int num_posix_groups = 28;
--
--} // namespace re2
-diff --git a/re2/re2/prefilter.cc b/re2/re2/prefilter.cc
-deleted file mode 100644
-index 30e4c01..0000000
---- a/re2/re2/prefilter.cc
-+++ /dev/null
-@@ -1,671 +0,0 @@
--// Copyright 2009 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--#include "util/util.h"
--#include "re2/prefilter.h"
--#include "re2/re2.h"
--#include "re2/unicode_casefold.h"
--#include "re2/walker-inl.h"
--
--namespace re2 {
--
--static const int Trace = false;
--
--typedef set<string>::iterator SSIter;
--typedef set<string>::const_iterator ConstSSIter;
--
--static int alloc_id = 100000; // Used for debugging.
--// Initializes a Prefilter, allocating subs_ as necessary.
--Prefilter::Prefilter(Op op) {
-- op_ = op;
-- subs_ = NULL;
-- if (op_ == AND || op_ == OR)
-- subs_ = new vector<Prefilter*>;
--
-- alloc_id_ = alloc_id++;
-- VLOG(10) << "alloc_id: " << alloc_id_;
--}
--
--// Destroys a Prefilter.
--Prefilter::~Prefilter() {
-- VLOG(10) << "Deleted: " << alloc_id_;
-- if (subs_) {
-- for (int i = 0; i < subs_->size(); i++)
-- delete (*subs_)[i];
-- delete subs_;
-- subs_ = NULL;
-- }
--}
--
--// Simplify if the node is an empty Or or And.
--Prefilter* Prefilter::Simplify() {
-- if (op_ != AND && op_ != OR) {
-- return this;
-- }
--
-- // Nothing left in the AND/OR.
-- if (subs_->size() == 0) {
-- if (op_ == AND)
-- op_ = ALL; // AND of nothing is true
-- else
-- op_ = NONE; // OR of nothing is false
--
-- return this;
-- }
--
-- // Just one subnode: throw away wrapper.
-- if (subs_->size() == 1) {
-- Prefilter* a = (*subs_)[0];
-- subs_->clear();
-- delete this;
-- return a->Simplify();
-- }
--
-- return this;
--}
--
--// Combines two Prefilters together to create an "op" (AND or OR).
--// The passed Prefilters will be part of the returned Prefilter or deleted.
--// Does lots of work to avoid creating unnecessarily complicated structures.
--Prefilter* Prefilter::AndOr(Op op, Prefilter* a, Prefilter* b) {
-- // If a, b can be rewritten as op, do so.
-- a = a->Simplify();
-- b = b->Simplify();
--
-- // Canonicalize: a->op <= b->op.
-- if (a->op() > b->op()) {
-- Prefilter* t = a;
-- a = b;
-- b = t;
-- }
--
-- // Trivial cases.
-- // ALL AND b = b
-- // NONE OR b = b
-- // ALL OR b = ALL
-- // NONE AND b = NONE
-- // Don't need to look at b, because of canonicalization above.
-- // ALL and NONE are smallest opcodes.
-- if (a->op() == ALL || a->op() == NONE) {
-- if ((a->op() == ALL && op == AND) ||
-- (a->op() == NONE && op == OR)) {
-- delete a;
-- return b;
-- } else {
-- delete b;
-- return a;
-- }
-- }
--
-- // If a and b match op, merge their contents.
-- if (a->op() == op && b->op() == op) {
-- for (int i = 0; i < b->subs()->size(); i++) {
-- Prefilter* bb = (*b->subs())[i];
-- a->subs()->push_back(bb);
-- }
-- b->subs()->clear();
-- delete b;
-- return a;
-- }
--
-- // If a already has the same op as the op that is under construction
-- // add in b (similarly if b already has the same op, add in a).
-- if (b->op() == op) {
-- Prefilter* t = a;
-- a = b;
-- b = t;
-- }
-- if (a->op() == op) {
-- a->subs()->push_back(b);
-- return a;
-- }
--
-- // Otherwise just return the op.
-- Prefilter* c = new Prefilter(op);
-- c->subs()->push_back(a);
-- c->subs()->push_back(b);
-- return c;
--}
--
--Prefilter* Prefilter::And(Prefilter* a, Prefilter* b) {
-- return AndOr(AND, a, b);
--}
--
--Prefilter* Prefilter::Or(Prefilter* a, Prefilter* b) {
-- return AndOr(OR, a, b);
--}
--
--static void SimplifyStringSet(set<string> *ss) {
-- // Now make sure that the strings aren't redundant. For example, if
-- // we know "ab" is a required string, then it doesn't help at all to
-- // know that "abc" is also a required string, so delete "abc". This
-- // is because, when we are performing a string search to filter
-- // regexps, matching ab will already allow this regexp to be a
-- // candidate for match, so further matching abc is redundant.
--
-- for (SSIter i = ss->begin(); i != ss->end(); ++i) {
-- SSIter j = i;
-- ++j;
-- while (j != ss->end()) {
-- // Increment j early so that we can erase the element it points to.
-- SSIter old_j = j;
-- ++j;
-- if (old_j->find(*i) != string::npos)
-- ss->erase(old_j);
-- }
-- }
--}
--
--Prefilter* Prefilter::OrStrings(set<string>* ss) {
-- SimplifyStringSet(ss);
-- Prefilter* or_prefilter = NULL;
-- if (!ss->empty()) {
-- or_prefilter = new Prefilter(NONE);
-- for (SSIter i = ss->begin(); i != ss->end(); ++i)
-- or_prefilter = Or(or_prefilter, FromString(*i));
-- }
-- return or_prefilter;
--}
--
--static Rune ToLowerRune(Rune r) {
-- if (r < Runeself) {
-- if ('A' <= r && r <= 'Z')
-- r += 'a' - 'A';
-- return r;
-- }
--
-- CaseFold *f = LookupCaseFold(unicode_tolower, num_unicode_tolower, r);
-- if (f == NULL || r < f->lo)
-- return r;
-- return ApplyFold(f, r);
--}
--
--Prefilter* Prefilter::FromString(const string& str) {
-- Prefilter* m = new Prefilter(Prefilter::ATOM);
-- m->atom_ = str;
-- return m;
--}
--
--// Information about a regexp used during computation of Prefilter.
--// Can be thought of as information about the set of strings matching
--// the given regular expression.
--class Prefilter::Info {
-- public:
-- Info();
-- ~Info();
--
-- // More constructors. They delete their Info* arguments.
-- static Info* Alt(Info* a, Info* b);
-- static Info* Concat(Info* a, Info* b);
-- static Info* And(Info* a, Info* b);
-- static Info* Star(Info* a);
-- static Info* Plus(Info* a);
-- static Info* Quest(Info* a);
-- static Info* EmptyString();
-- static Info* NoMatch();
-- static Info* AnyChar();
-- static Info* CClass(CharClass* cc);
-- static Info* Literal(Rune r);
-- static Info* AnyMatch();
--
-- // Format Info as a string.
-- string ToString();
--
-- // Caller takes ownership of the Prefilter.
-- Prefilter* TakeMatch();
--
-- set<string>& exact() { return exact_; }
--
-- bool is_exact() const { return is_exact_; }
--
-- class Walker;
--
-- private:
-- set<string> exact_;
--
-- // When is_exact_ is true, the strings that match
-- // are placed in exact_. When it is no longer an exact
-- // set of strings that match this RE, then is_exact_
-- // is false and the match_ contains the required match
-- // criteria.
-- bool is_exact_;
--
-- // Accumulated Prefilter query that any
-- // match for this regexp is guaranteed to match.
-- Prefilter* match_;
--};
--
--
--Prefilter::Info::Info()
-- : is_exact_(false),
-- match_(NULL) {
--}
--
--Prefilter::Info::~Info() {
-- delete match_;
--}
--
--Prefilter* Prefilter::Info::TakeMatch() {
-- if (is_exact_) {
-- match_ = Prefilter::OrStrings(&exact_);
-- is_exact_ = false;
-- }
-- Prefilter* m = match_;
-- match_ = NULL;
-- return m;
--}
--
--// Format a Info in string form.
--string Prefilter::Info::ToString() {
-- if (this == NULL) {
-- // Sometimes when iterating on children of a node,
-- // some children might have NULL Info. Adding
-- // the check here for NULL to take care of cases where
-- // the caller is not checking.
-- return "";
-- }
--
-- if (is_exact_) {
-- int n = 0;
-- string s;
-- for (set<string>::iterator i = exact_.begin(); i != exact_.end(); ++i) {
-- if (n++ > 0)
-- s += ",";
-- s += *i;
-- }
-- return s;
-- }
--
-- if (match_)
-- return match_->DebugString();
--
-- return "";
--}
--
--// Add the strings from src to dst.
--static void CopyIn(const set<string>& src, set<string>* dst) {
-- for (ConstSSIter i = src.begin(); i != src.end(); ++i)
-- dst->insert(*i);
--}
--
--// Add the cross-product of a and b to dst.
--// (For each string i in a and j in b, add i+j.)
--static void CrossProduct(const set<string>& a,
-- const set<string>& b,
-- set<string>* dst) {
-- for (ConstSSIter i = a.begin(); i != a.end(); ++i)
-- for (ConstSSIter j = b.begin(); j != b.end(); ++j)
-- dst->insert(*i + *j);
--}
--
--// Concats a and b. Requires that both are exact sets.
--// Forms an exact set that is a crossproduct of a and b.
--Prefilter::Info* Prefilter::Info::Concat(Info* a, Info* b) {
-- if (a == NULL)
-- return b;
-- DCHECK(a->is_exact_);
-- DCHECK(b && b->is_exact_);
-- Info *ab = new Info();
--
-- CrossProduct(a->exact_, b->exact_, &ab->exact_);
-- ab->is_exact_ = true;
--
-- delete a;
-- delete b;
-- return ab;
--}
--
--// Constructs an inexact Info for ab given a and b.
--// Used only when a or b is not exact or when the
--// exact cross product is likely to be too big.
--Prefilter::Info* Prefilter::Info::And(Info* a, Info* b) {
-- if (a == NULL)
-- return b;
-- if (b == NULL)
-- return a;
--
-- Info *ab = new Info();
--
-- ab->match_ = Prefilter::And(a->TakeMatch(), b->TakeMatch());
-- ab->is_exact_ = false;
-- delete a;
-- delete b;
-- return ab;
--}
--
--// Constructs Info for a|b given a and b.
--Prefilter::Info* Prefilter::Info::Alt(Info* a, Info* b) {
-- Info *ab = new Info();
--
-- if (a->is_exact_ && b->is_exact_) {
-- CopyIn(a->exact_, &ab->exact_);
-- CopyIn(b->exact_, &ab->exact_);
-- ab->is_exact_ = true;
-- } else {
-- // Either a or b has is_exact_ = false. If the other
-- // one has is_exact_ = true, we move it to match_ and
-- // then create a OR of a,b. The resulting Info has
-- // is_exact_ = false.
-- ab->match_ = Prefilter::Or(a->TakeMatch(), b->TakeMatch());
-- ab->is_exact_ = false;
-- }
--
-- delete a;
-- delete b;
-- return ab;
--}
--
--// Constructs Info for a? given a.
--Prefilter::Info* Prefilter::Info::Quest(Info *a) {
-- Info *ab = new Info();
--
-- ab->is_exact_ = false;
-- ab->match_ = new Prefilter(ALL);
-- delete a;
-- return ab;
--}
--
--// Constructs Info for a* given a.
--// Same as a? -- not much to do.
--Prefilter::Info* Prefilter::Info::Star(Info *a) {
-- return Quest(a);
--}
--
--// Constructs Info for a+ given a. If a was exact set, it isn't
--// anymore.
--Prefilter::Info* Prefilter::Info::Plus(Info *a) {
-- Info *ab = new Info();
--
-- ab->match_ = a->TakeMatch();
-- ab->is_exact_ = false;
--
-- delete a;
-- return ab;
--}
--
--static string RuneToString(Rune r) {
-- char buf[UTFmax];
-- int n = runetochar(buf, &r);
-- return string(buf, n);
--}
--
--// Constructs Info for literal rune.
--Prefilter::Info* Prefilter::Info::Literal(Rune r) {
-- Info* info = new Info();
-- info->exact_.insert(RuneToString(ToLowerRune(r)));
-- info->is_exact_ = true;
-- return info;
--}
--
--// Constructs Info for dot (any character).
--Prefilter::Info* Prefilter::Info::AnyChar() {
-- Prefilter::Info* info = new Prefilter::Info();
-- info->match_ = new Prefilter(ALL);
-- return info;
--}
--
--// Constructs Prefilter::Info for no possible match.
--Prefilter::Info* Prefilter::Info::NoMatch() {
-- Prefilter::Info* info = new Prefilter::Info();
-- info->match_ = new Prefilter(NONE);
-- return info;
--}
--
--// Constructs Prefilter::Info for any possible match.
--// This Prefilter::Info is valid for any regular expression,
--// since it makes no assertions whatsoever about the
--// strings being matched.
--Prefilter::Info* Prefilter::Info::AnyMatch() {
-- Prefilter::Info *info = new Prefilter::Info();
-- info->match_ = new Prefilter(ALL);
-- return info;
--}
--
--// Constructs Prefilter::Info for just the empty string.
--Prefilter::Info* Prefilter::Info::EmptyString() {
-- Prefilter::Info* info = new Prefilter::Info();
-- info->is_exact_ = true;
-- info->exact_.insert("");
-- return info;
--}
--
--// Constructs Prefilter::Info for a character class.
--typedef CharClass::iterator CCIter;
--Prefilter::Info* Prefilter::Info::CClass(CharClass *cc) {
-- if (Trace) {
-- VLOG(0) << "CharClassInfo:";
-- for (CCIter i = cc->begin(); i != cc->end(); ++i)
-- VLOG(0) << " " << i->lo << "-" << i->hi;
-- }
--
-- // If the class is too large, it's okay to overestimate.
-- if (cc->size() > 10)
-- return AnyChar();
--
-- Prefilter::Info *a = new Prefilter::Info();
-- for (CCIter i = cc->begin(); i != cc->end(); ++i)
-- for (Rune r = i->lo; r <= i->hi; r++)
-- a->exact_.insert(RuneToString(ToLowerRune(r)));
--
-- a->is_exact_ = true;
--
-- if (Trace) {
-- VLOG(0) << " = " << a->ToString();
-- }
--
-- return a;
--}
--
--class Prefilter::Info::Walker : public Regexp::Walker<Prefilter::Info*> {
-- public:
-- Walker() {}
--
-- virtual Info* PostVisit(
-- Regexp* re, Info* parent_arg,
-- Info* pre_arg,
-- Info** child_args, int nchild_args);
--
-- virtual Info* ShortVisit(
-- Regexp* re,
-- Info* parent_arg);
--
-- private:
-- DISALLOW_EVIL_CONSTRUCTORS(Walker);
--};
--
--Prefilter::Info* Prefilter::BuildInfo(Regexp* re) {
-- if (Trace) {
-- LOG(INFO) << "BuildPrefilter::Info: " << re->ToString();
-- }
-- Prefilter::Info::Walker w;
-- Prefilter::Info* info = w.WalkExponential(re, NULL, 100000);
--
-- if (w.stopped_early()) {
-- delete info;
-- return NULL;
-- }
--
-- return info;
--}
--
--Prefilter::Info* Prefilter::Info::Walker::ShortVisit(
-- Regexp* re, Prefilter::Info* parent_arg) {
-- return AnyMatch();
--}
--
--// Constructs the Prefilter::Info for the given regular expression.
--// Assumes re is simplified.
--Prefilter::Info* Prefilter::Info::Walker::PostVisit(
-- Regexp* re, Prefilter::Info* parent_arg,
-- Prefilter::Info* pre_arg, Prefilter::Info** child_args,
-- int nchild_args) {
-- Prefilter::Info *info;
-- switch (re->op()) {
-- default:
-- case kRegexpRepeat:
-- LOG(DFATAL) << "Bad regexp op " << re->op();
-- info = EmptyString();
-- break;
--
-- case kRegexpNoMatch:
-- info = NoMatch();
-- break;
--
-- // These ops match the empty string:
-- case kRegexpEmptyMatch: // anywhere
-- case kRegexpBeginLine: // at beginning of line
-- case kRegexpEndLine: // at end of line
-- case kRegexpBeginText: // at beginning of text
-- case kRegexpEndText: // at end of text
-- case kRegexpWordBoundary: // at word boundary
-- case kRegexpNoWordBoundary: // not at word boundary
-- info = EmptyString();
-- break;
--
-- case kRegexpLiteral:
-- info = Literal(re->rune());
-- break;
--
-- case kRegexpLiteralString:
-- if (re->nrunes() == 0) {
-- info = NoMatch();
-- break;
-- }
-- info = Literal(re->runes()[0]);
-- for (int i = 1; i < re->nrunes(); i++)
-- info = Concat(info, Literal(re->runes()[i]));
-- break;
--
-- case kRegexpConcat: {
-- // Accumulate in info.
-- // Exact is concat of recent contiguous exact nodes.
-- info = NULL;
-- Info* exact = NULL;
-- for (int i = 0; i < nchild_args; i++) {
-- Info* ci = child_args[i]; // child info
-- if (!ci->is_exact() ||
-- (exact && ci->exact().size() * exact->exact().size() > 16)) {
-- // Exact run is over.
-- info = And(info, exact);
-- exact = NULL;
-- // Add this child's info.
-- info = And(info, ci);
-- } else {
-- // Append to exact run.
-- exact = Concat(exact, ci);
-- }
-- }
-- info = And(info, exact);
-- }
-- break;
--
-- case kRegexpAlternate:
-- info = child_args[0];
-- for (int i = 1; i < nchild_args; i++)
-- info = Alt(info, child_args[i]);
-- VLOG(10) << "Alt: " << info->ToString();
-- break;
--
-- case kRegexpStar:
-- info = Star(child_args[0]);
-- break;
--
-- case kRegexpQuest:
-- info = Quest(child_args[0]);
-- break;
--
-- case kRegexpPlus:
-- info = Plus(child_args[0]);
-- break;
--
-- case kRegexpAnyChar:
-- // Claim nothing, except that it's not empty.
-- info = AnyChar();
-- break;
--
-- case kRegexpCharClass:
-- info = CClass(re->cc());
-- break;
--
-- case kRegexpCapture:
-- // These don't affect the set of matching strings.
-- info = child_args[0];
-- break;
-- }
--
-- if (Trace) {
-- VLOG(0) << "BuildInfo " << re->ToString()
-- << ": " << info->ToString();
-- }
--
-- return info;
--}
--
--
--Prefilter* Prefilter::FromRegexp(Regexp* re) {
-- if (re == NULL)
-- return NULL;
--
-- Regexp* simple = re->Simplify();
-- Prefilter::Info *info = BuildInfo(simple);
--
-- simple->Decref();
-- if (info == NULL)
-- return NULL;
--
-- Prefilter* m = info->TakeMatch();
--
-- delete info;
-- return m;
--}
--
--string Prefilter::DebugString() const {
-- if (this == NULL)
-- return "<nil>";
--
-- switch (op_) {
-- default:
-- LOG(DFATAL) << "Bad op in Prefilter::DebugString: " << op_;
-- return StringPrintf("op%d", op_);
-- case NONE:
-- return "*no-matches*";
-- case ATOM:
-- return atom_;
-- case ALL:
-- return "";
-- case AND: {
-- string s = "";
-- for (int i = 0; i < subs_->size(); i++) {
-- if (i > 0)
-- s += " ";
-- s += (*subs_)[i]->DebugString();
-- }
-- return s;
-- }
-- case OR: {
-- string s = "(";
-- for (int i = 0; i < subs_->size(); i++) {
-- if (i > 0)
-- s += "|";
-- s += (*subs_)[i]->DebugString();
-- }
-- s += ")";
-- return s;
-- }
-- }
--}
--
--Prefilter* Prefilter::FromRE2(const RE2* re2) {
-- if (re2 == NULL)
-- return NULL;
--
-- Regexp* regexp = re2->Regexp();
-- if (regexp == NULL)
-- return NULL;
--
-- return FromRegexp(regexp);
--}
--
--
--} // namespace re2
-diff --git a/re2/re2/prefilter.h b/re2/re2/prefilter.h
-deleted file mode 100644
-index c2f9ddd..0000000
---- a/re2/re2/prefilter.h
-+++ /dev/null
-@@ -1,105 +0,0 @@
--// Copyright 2009 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Prefilter is the class used to extract string guards from regexps.
--// Rather than using Prefilter class directly, use FilteredRE2.
--// See filtered_re2.h
--
--#ifndef RE2_PREFILTER_H_
--#define RE2_PREFILTER_H_
--
--#include "util/util.h"
--
--namespace re2 {
--
--class RE2;
--
--class Regexp;
--
--class Prefilter {
-- // Instead of using Prefilter directly, use FilteredRE2; see filtered_re2.h
-- public:
-- enum Op {
-- ALL = 0, // Everything matches
-- NONE, // Nothing matches
-- ATOM, // The string atom() must match
-- AND, // All in subs() must match
-- OR, // One of subs() must match
-- };
--
-- explicit Prefilter(Op op);
-- ~Prefilter();
--
-- Op op() { return op_; }
-- const string& atom() const { return atom_; }
-- void set_unique_id(int id) { unique_id_ = id; }
-- int unique_id() const { return unique_id_; }
--
-- // The children of the Prefilter node.
-- vector<Prefilter*>* subs() {
-- CHECK(op_ == AND || op_ == OR);
-- return subs_;
-- }
--
-- // Set the children vector. Prefilter takes ownership of subs and
-- // subs_ will be deleted when Prefilter is deleted.
-- void set_subs(vector<Prefilter*>* subs) { subs_ = subs; }
--
-- // Given a RE2, return a Prefilter. The caller takes ownership of
-- // the Prefilter and should deallocate it. Returns NULL if Prefilter
-- // cannot be formed.
-- static Prefilter* FromRE2(const RE2* re2);
--
-- // Returns a readable debug string of the prefilter.
-- string DebugString() const;
--
-- private:
-- class Info;
--
-- // Combines two prefilters together to create an AND. The passed
-- // Prefilters will be part of the returned Prefilter or deleted.
-- static Prefilter* And(Prefilter* a, Prefilter* b);
--
-- // Combines two prefilters together to create an OR. The passed
-- // Prefilters will be part of the returned Prefilter or deleted.
-- static Prefilter* Or(Prefilter* a, Prefilter* b);
--
-- // Generalized And/Or
-- static Prefilter* AndOr(Op op, Prefilter* a, Prefilter* b);
--
-- static Prefilter* FromRegexp(Regexp* a);
--
-- static Prefilter* FromString(const string& str);
--
-- static Prefilter* OrStrings(set<string>* ss);
--
-- static Info* BuildInfo(Regexp* re);
--
-- Prefilter* Simplify();
--
-- // Kind of Prefilter.
-- Op op_;
--
-- // Sub-matches for AND or OR Prefilter.
-- vector<Prefilter*>* subs_;
--
-- // Actual string to match in leaf node.
-- string atom_;
--
-- // If different prefilters have the same string atom, or if they are
-- // structurally the same (e.g., OR of same atom strings) they are
-- // considered the same unique nodes. This is the id for each unique
-- // node. This field is populated with a unique id for every node,
-- // and -1 for duplicate nodes.
-- int unique_id_;
--
-- // Used for debugging, helps in tracking memory leaks.
-- int alloc_id_;
--
-- DISALLOW_EVIL_CONSTRUCTORS(Prefilter);
--};
--
--} // namespace re2
--
--#endif // RE2_PREFILTER_H_
-diff --git a/re2/re2/prefilter_tree.cc b/re2/re2/prefilter_tree.cc
-deleted file mode 100644
-index d8bc37a..0000000
---- a/re2/re2/prefilter_tree.cc
-+++ /dev/null
-@@ -1,398 +0,0 @@
--// Copyright 2009 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--#include "util/util.h"
--#include "util/flags.h"
--#include "re2/prefilter.h"
--#include "re2/prefilter_tree.h"
--#include "re2/re2.h"
--
--DEFINE_int32(filtered_re2_min_atom_len,
-- 3,
-- "Strings less than this length are not stored as atoms");
--
--namespace re2 {
--
--PrefilterTree::PrefilterTree()
-- : compiled_(false) {
--}
--
--PrefilterTree::~PrefilterTree() {
-- for (int i = 0; i < prefilter_vec_.size(); i++)
-- delete prefilter_vec_[i];
--
-- for (int i = 0; i < entries_.size(); i++)
-- delete entries_[i].parents;
--}
--
--// Functions used for adding and Compiling prefilters to the
--// PrefilterTree.
--static bool KeepPart(Prefilter* prefilter, int level) {
-- if (prefilter == NULL)
-- return false;
--
-- switch (prefilter->op()) {
-- default:
-- LOG(DFATAL) << "Unexpected op in KeepPart: "
-- << prefilter->op();
-- return false;
--
-- case Prefilter::ALL:
-- return false;
--
-- case Prefilter::ATOM:
-- return prefilter->atom().size() >=
-- FLAGS_filtered_re2_min_atom_len;
--
-- case Prefilter::AND: {
-- int j = 0;
-- vector<Prefilter*>* subs = prefilter->subs();
-- for (int i = 0; i < subs->size(); i++)
-- if (KeepPart((*subs)[i], level + 1))
-- (*subs)[j++] = (*subs)[i];
-- else
-- delete (*subs)[i];
--
-- subs->resize(j);
-- return j > 0;
-- }
--
-- case Prefilter::OR:
-- for (int i = 0; i < prefilter->subs()->size(); i++)
-- if (!KeepPart((*prefilter->subs())[i], level + 1))
-- return false;
-- return true;
-- }
--}
--
--void PrefilterTree::Add(Prefilter *f) {
-- if (compiled_) {
-- LOG(DFATAL) << "Add after Compile.";
-- return;
-- }
-- if (f != NULL && !KeepPart(f, 0)) {
-- delete f;
-- f = NULL;
-- }
--
-- prefilter_vec_.push_back(f);
--}
--
--void PrefilterTree::Compile(vector<string>* atom_vec) {
-- if (compiled_) {
-- LOG(DFATAL) << "Compile after Compile.";
-- return;
-- }
--
-- // We do this check to support some legacy uses of
-- // PrefilterTree that call Compile before adding any regexps,
-- // and expect Compile not to have effect.
-- if (prefilter_vec_.empty())
-- return;
--
-- compiled_ = true;
--
-- AssignUniqueIds(atom_vec);
--
-- // Identify nodes that are too common among prefilters and are
-- // triggering too many parents. Then get rid of them if possible.
-- // Note that getting rid of a prefilter node simply means they are
-- // no longer necessary for their parent to trigger; that is, we do
-- // not miss out on any regexps triggering by getting rid of a
-- // prefilter node.
-- for (int i = 0; i < entries_.size(); i++) {
-- IntMap* parents = entries_[i].parents;
-- if (parents->size() > 8) {
-- // This one triggers too many things. If all the parents are AND
-- // nodes and have other things guarding them, then get rid of
-- // this trigger. TODO(vsri): Adjust the threshold appropriately,
-- // make it a function of total number of nodes?
-- bool have_other_guard = true;
-- for (IntMap::iterator it = parents->begin(); it != parents->end(); ++it)
-- have_other_guard = have_other_guard &&
-- (entries_[it->index()].propagate_up_at_count > 1);
--
-- if (have_other_guard) {
-- for (IntMap::iterator it = parents->begin();
-- it != parents->end(); ++it)
-- entries_[it->index()].propagate_up_at_count -= 1;
--
-- parents->clear(); // Forget the parents
-- }
-- }
-- }
--
-- PrintDebugInfo();
--}
--
--Prefilter* PrefilterTree::CanonicalNode(Prefilter* node) {
-- string node_string = NodeString(node);
-- map<string, Prefilter*>::iterator iter = node_map_.find(node_string);
-- if (iter == node_map_.end())
-- return NULL;
-- return (*iter).second;
--}
--
--static string Itoa(int n) {
-- char buf[100];
-- snprintf(buf, sizeof buf, "%d", n);
-- return string(buf);
--}
--
--string PrefilterTree::NodeString(Prefilter* node) const {
-- // Adding the operation disambiguates AND/OR/atom nodes.
-- string s = Itoa(node->op()) + ":";
-- if (node->op() == Prefilter::ATOM) {
-- s += node->atom();
-- } else {
-- for (int i = 0; i < node->subs()->size() ; i++) {
-- if (i > 0)
-- s += ',';
-- s += Itoa((*node->subs())[i]->unique_id());
-- }
-- }
-- return s;
--}
--
--void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) {
-- atom_vec->clear();
--
-- // Build vector of all filter nodes, sorted topologically
-- // from top to bottom in v.
-- vector<Prefilter*> v;
--
-- // Add the top level nodes of each regexp prefilter.
-- for (int i = 0; i < prefilter_vec_.size(); i++) {
-- Prefilter* f = prefilter_vec_[i];
-- if (f == NULL)
-- unfiltered_.push_back(i);
--
-- // We push NULL also on to v, so that we maintain the
-- // mapping of index==regexpid for level=0 prefilter nodes.
-- v.push_back(f);
-- }
--
-- // Now add all the descendant nodes.
-- for (int i = 0; i < v.size(); i++) {
-- Prefilter* f = v[i];
-- if (f == NULL)
-- continue;
-- if (f->op() == Prefilter::AND || f->op() == Prefilter::OR) {
-- const vector<Prefilter*>& subs = *f->subs();
-- for (int j = 0; j < subs.size(); j++)
-- v.push_back(subs[j]);
-- }
-- }
--
-- // Identify unique nodes.
-- int unique_id = 0;
-- for (int i = v.size() - 1; i >= 0; i--) {
-- Prefilter *node = v[i];
-- if (node == NULL)
-- continue;
-- node->set_unique_id(-1);
-- Prefilter* canonical = CanonicalNode(node);
-- if (canonical == NULL) {
-- // Any further nodes that have the same node string
-- // will find this node as the canonical node.
-- node_map_[NodeString(node)] = node;
-- if (node->op() == Prefilter::ATOM) {
-- atom_vec->push_back(node->atom());
-- atom_index_to_id_.push_back(unique_id);
-- }
-- node->set_unique_id(unique_id++);
-- } else {
-- node->set_unique_id(canonical->unique_id());
-- }
-- }
-- entries_.resize(node_map_.size());
--
-- // Create parent IntMap for the entries.
-- for (int i = v.size() - 1; i >= 0; i--) {
-- Prefilter* prefilter = v[i];
-- if (prefilter == NULL)
-- continue;
--
-- if (CanonicalNode(prefilter) != prefilter)
-- continue;
--
-- Entry* entry = &entries_[prefilter->unique_id()];
-- entry->parents = new IntMap(node_map_.size());
-- }
--
-- // Fill the entries.
-- for (int i = v.size() - 1; i >= 0; i--) {
-- Prefilter* prefilter = v[i];
-- if (prefilter == NULL)
-- continue;
--
-- if (CanonicalNode(prefilter) != prefilter)
-- continue;
--
-- Entry* entry = &entries_[prefilter->unique_id()];
--
-- switch (prefilter->op()) {
-- default:
-- case Prefilter::ALL:
-- LOG(DFATAL) << "Unexpected op: " << prefilter->op();
-- return;
--
-- case Prefilter::ATOM:
-- entry->propagate_up_at_count = 1;
-- break;
--
-- case Prefilter::OR:
-- case Prefilter::AND: {
-- IntMap uniq_child(node_map_.size());
-- for (int j = 0; j < prefilter->subs()->size() ; j++) {
-- Prefilter* child = (*prefilter->subs())[j];
-- Prefilter* canonical = CanonicalNode(child);
-- if (canonical == NULL) {
-- LOG(DFATAL) << "Null canonical node";
-- return;
-- }
-- int child_id = canonical->unique_id();
-- if (!uniq_child.has_index(child_id))
-- uniq_child.set_new(child_id, 1);
-- // To the child, we want to add to parent indices.
-- Entry* child_entry = &entries_[child_id];
-- if (!child_entry->parents->has_index(prefilter->unique_id()))
-- child_entry->parents->set_new(prefilter->unique_id(), 1);
-- }
-- entry->propagate_up_at_count =
-- prefilter->op() == Prefilter::AND ? uniq_child.size() : 1;
--
-- break;
-- }
-- }
-- }
--
-- // For top level nodes, populate regexp id.
-- for (int i = 0; i < prefilter_vec_.size(); i++) {
-- if (prefilter_vec_[i] == NULL)
-- continue;
-- int id = CanonicalNode(prefilter_vec_[i])->unique_id();
-- DCHECK_LE(0, id);
-- Entry* entry = &entries_[id];
-- entry->regexps.push_back(i);
-- }
--}
--
--// Functions for triggering during search.
--void PrefilterTree::RegexpsGivenStrings(
-- const vector<int>& matched_atoms,
-- vector<int>* regexps) const {
-- regexps->clear();
-- if (!compiled_) {
-- LOG(WARNING) << "Compile() not called";
-- for (int i = 0; i < prefilter_vec_.size(); ++i)
-- regexps->push_back(i);
-- } else {
-- if (!prefilter_vec_.empty()) {
-- IntMap regexps_map(prefilter_vec_.size());
-- vector<int> matched_atom_ids;
-- for (int j = 0; j < matched_atoms.size(); j++) {
-- matched_atom_ids.push_back(atom_index_to_id_[matched_atoms[j]]);
-- VLOG(10) << "Atom id:" << atom_index_to_id_[matched_atoms[j]];
-- }
-- PropagateMatch(matched_atom_ids, ®exps_map);
-- for (IntMap::iterator it = regexps_map.begin();
-- it != regexps_map.end();
-- ++it)
-- regexps->push_back(it->index());
--
-- regexps->insert(regexps->end(), unfiltered_.begin(), unfiltered_.end());
-- }
-- }
-- sort(regexps->begin(), regexps->end());
--}
--
--void PrefilterTree::PropagateMatch(const vector<int>& atom_ids,
-- IntMap* regexps) const {
-- IntMap count(entries_.size());
-- IntMap work(entries_.size());
-- for (int i = 0; i < atom_ids.size(); i++)
-- work.set(atom_ids[i], 1);
-- for (IntMap::iterator it = work.begin(); it != work.end(); ++it) {
-- const Entry& entry = entries_[it->index()];
-- VLOG(10) << "Processing: " << it->index();
-- // Record regexps triggered.
-- for (int i = 0; i < entry.regexps.size(); i++) {
-- VLOG(10) << "Regexp triggered: " << entry.regexps[i];
-- regexps->set(entry.regexps[i], 1);
-- }
-- int c;
-- // Pass trigger up to parents.
-- for (IntMap::iterator it = entry.parents->begin();
-- it != entry.parents->end();
-- ++it) {
-- int j = it->index();
-- const Entry& parent = entries_[j];
-- VLOG(10) << " parent= " << j << " trig= " << parent.propagate_up_at_count;
-- // Delay until all the children have succeeded.
-- if (parent.propagate_up_at_count > 1) {
-- if (count.has_index(j)) {
-- c = count.get_existing(j) + 1;
-- count.set_existing(j, c);
-- } else {
-- c = 1;
-- count.set_new(j, c);
-- }
-- if (c < parent.propagate_up_at_count)
-- continue;
-- }
-- VLOG(10) << "Triggering: " << j;
-- // Trigger the parent.
-- work.set(j, 1);
-- }
-- }
--}
--
--// Debugging help.
--void PrefilterTree::PrintPrefilter(int regexpid) {
-- LOG(INFO) << DebugNodeString(prefilter_vec_[regexpid]);
--}
--
--void PrefilterTree::PrintDebugInfo() {
-- VLOG(10) << "#Unique Atoms: " << atom_index_to_id_.size();
-- VLOG(10) << "#Unique Nodes: " << entries_.size();
--
-- for (int i = 0; i < entries_.size(); ++i) {
-- IntMap* parents = entries_[i].parents;
-- const vector<int>& regexps = entries_[i].regexps;
-- VLOG(10) << "EntryId: " << i
-- << " N: " << parents->size() << " R: " << regexps.size();
-- for (IntMap::iterator it = parents->begin(); it != parents->end(); ++it)
-- VLOG(10) << it->index();
-- }
-- VLOG(10) << "Map:";
-- for (map<string, Prefilter*>::const_iterator iter = node_map_.begin();
-- iter != node_map_.end(); ++iter)
-- VLOG(10) << "NodeId: " << (*iter).second->unique_id()
-- << " Str: " << (*iter).first;
--}
--
--string PrefilterTree::DebugNodeString(Prefilter* node) const {
-- string node_string = "";
--
-- if (node->op() == Prefilter::ATOM) {
-- DCHECK(!node->atom().empty());
-- node_string += node->atom();
-- } else {
-- // Adding the operation disambiguates AND and OR nodes.
-- node_string += node->op() == Prefilter::AND ? "AND" : "OR";
-- node_string += "(";
-- for (int i = 0; i < node->subs()->size() ; i++) {
-- if (i > 0)
-- node_string += ',';
-- node_string += Itoa((*node->subs())[i]->unique_id());
-- node_string += ":";
-- node_string += DebugNodeString((*node->subs())[i]);
-- }
-- node_string += ")";
-- }
-- return node_string;
--}
--
--} // namespace re2
-diff --git a/re2/re2/prefilter_tree.h b/re2/re2/prefilter_tree.h
-deleted file mode 100644
-index 596b734..0000000
---- a/re2/re2/prefilter_tree.h
-+++ /dev/null
-@@ -1,130 +0,0 @@
--// Copyright 2009 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// The PrefilterTree class is used to form an AND-OR tree of strings
--// that would trigger each regexp. The 'prefilter' of each regexp is
--// added tp PrefilterTree, and then PrefilterTree is used to find all
--// the unique strings across the prefilters. During search, by using
--// matches from a string matching engine, PrefilterTree deduces the
--// set of regexps that are to be triggered. The 'string matching
--// engine' itself is outside of this class, and the caller can use any
--// favorite engine. PrefilterTree provides a set of strings (called
--// atoms) that the user of this class should use to do the string
--// matching.
--//
--#ifndef RE2_PREFILTER_TREE_H_
--#define RE2_PREFILTER_TREE_H_
--
--#include "util/util.h"
--#include "util/sparse_array.h"
--
--namespace re2 {
--
--typedef SparseArray<int> IntMap;
--
--class Prefilter;
--
--class PrefilterTree {
-- public:
-- PrefilterTree();
-- ~PrefilterTree();
--
-- // Adds the prefilter for the next regexp. Note that we assume that
-- // Add called sequentially for all regexps. All Add calls
-- // must precede Compile.
-- void Add(Prefilter* prefilter);
--
-- // The Compile returns a vector of string in atom_vec.
-- // Call this after all the prefilters are added through Add.
-- // No calls to Add after Compile are allowed.
-- // The caller should use the returned set of strings to do string matching.
-- // Each time a string matches, the corresponding index then has to be
-- // and passed to RegexpsGivenStrings below.
-- void Compile(vector<string>* atom_vec);
--
-- // Given the indices of the atoms that matched, returns the indexes
-- // of regexps that should be searched. The matched_atoms should
-- // contain all the ids of string atoms that were found to match the
-- // content. The caller can use any string match engine to perform
-- // this function. This function is thread safe.
-- void RegexpsGivenStrings(const vector<int>& matched_atoms,
-- vector<int>* regexps) const;
--
-- // Print debug prefilter. Also prints unique ids associated with
-- // nodes of the prefilter of the regexp.
-- void PrintPrefilter(int regexpid);
--
--
-- // Each unique node has a corresponding Entry that helps in
-- // passing the matching trigger information along the tree.
-- struct Entry {
-- public:
-- // How many children should match before this node triggers the
-- // parent. For an atom and an OR node, this is 1 and for an AND
-- // node, it is the number of unique children.
-- int propagate_up_at_count;
--
-- // When this node is ready to trigger the parent, what are the indices
-- // of the parent nodes to trigger. The reason there may be more than
-- // one is because of sharing. For example (abc | def) and (xyz | def)
-- // are two different nodes, but they share the atom 'def'. So when
-- // 'def' matches, it triggers two parents, corresponding to the two
-- // different OR nodes.
-- IntMap* parents;
--
-- // When this node is ready to trigger the parent, what are the
-- // regexps that are triggered.
-- vector<int> regexps;
-- };
--
-- private:
-- // This function assigns unique ids to various parts of the
-- // prefilter, by looking at if these nodes are already in the
-- // PrefilterTree.
-- void AssignUniqueIds(vector<string>* atom_vec);
--
-- // Given the matching atoms, find the regexps to be triggered.
-- void PropagateMatch(const vector<int>& atom_ids,
-- IntMap* regexps) const;
--
-- // Returns the prefilter node that has the same NodeString as this
-- // node. For the canonical node, returns node.
-- Prefilter* CanonicalNode(Prefilter* node);
--
-- // A string that uniquely identifies the node. Assumes that the
-- // children of node has already been assigned unique ids.
-- string NodeString(Prefilter* node) const;
--
-- // Recursively constructs a readable prefilter string.
-- string DebugNodeString(Prefilter* node) const;
--
-- // Used for debugging.
-- void PrintDebugInfo();
--
-- // These are all the nodes formed by Compile. Essentially, there is
-- // one node for each unique atom and each unique AND/OR node.
-- vector<Entry> entries_;
--
-- // Map node string to canonical Prefilter node.
-- map<string, Prefilter*> node_map_;
--
-- // indices of regexps that always pass through the filter (since we
-- // found no required literals in these regexps).
-- vector<int> unfiltered_;
--
-- // vector of Prefilter for all regexps.
-- vector<Prefilter*> prefilter_vec_;
--
-- // Atom index in returned strings to entry id mapping.
-- vector<int> atom_index_to_id_;
--
-- // Has the prefilter tree been compiled.
-- bool compiled_;
--
-- DISALLOW_EVIL_CONSTRUCTORS(PrefilterTree);
--};
--
--} // namespace
--
--#endif // RE2_PREFILTER_TREE_H_
-diff --git a/re2/re2/prog.cc b/re2/re2/prog.cc
-deleted file mode 100644
-index ef9ef23..0000000
---- a/re2/re2/prog.cc
-+++ /dev/null
-@@ -1,341 +0,0 @@
--// Copyright 2007 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Compiled regular expression representation.
--// Tested by compile_test.cc
--
--#include "util/util.h"
--#include "util/sparse_set.h"
--#include "re2/prog.h"
--#include "re2/stringpiece.h"
--
--namespace re2 {
--
--// Constructors per Inst opcode
--
--void Prog::Inst::InitAlt(uint32 out, uint32 out1) {
-- DCHECK_EQ(out_opcode_, 0);
-- set_out_opcode(out, kInstAlt);
-- out1_ = out1;
--}
--
--void Prog::Inst::InitByteRange(int lo, int hi, int foldcase, uint32 out) {
-- DCHECK_EQ(out_opcode_, 0);
-- set_out_opcode(out, kInstByteRange);
-- lo_ = lo & 0xFF;
-- hi_ = hi & 0xFF;
-- foldcase_ = foldcase;
--}
--
--void Prog::Inst::InitCapture(int cap, uint32 out) {
-- DCHECK_EQ(out_opcode_, 0);
-- set_out_opcode(out, kInstCapture);
-- cap_ = cap;
--}
--
--void Prog::Inst::InitEmptyWidth(EmptyOp empty, uint32 out) {
-- DCHECK_EQ(out_opcode_, 0);
-- set_out_opcode(out, kInstEmptyWidth);
-- empty_ = empty;
--}
--
--void Prog::Inst::InitMatch(int32 id) {
-- DCHECK_EQ(out_opcode_, 0);
-- set_opcode(kInstMatch);
-- match_id_ = id;
--}
--
--void Prog::Inst::InitNop(uint32 out) {
-- DCHECK_EQ(out_opcode_, 0);
-- set_opcode(kInstNop);
--}
--
--void Prog::Inst::InitFail() {
-- DCHECK_EQ(out_opcode_, 0);
-- set_opcode(kInstFail);
--}
--
--string Prog::Inst::Dump() {
-- switch (opcode()) {
-- default:
-- return StringPrintf("opcode %d", static_cast<int>(opcode()));
--
-- case kInstAlt:
-- return StringPrintf("alt -> %d | %d", out(), out1_);
--
-- case kInstAltMatch:
-- return StringPrintf("altmatch -> %d | %d", out(), out1_);
--
-- case kInstByteRange:
-- return StringPrintf("byte%s [%02x-%02x] -> %d",
-- foldcase_ ? "/i" : "",
-- lo_, hi_, out());
--
-- case kInstCapture:
-- return StringPrintf("capture %d -> %d", cap_, out());
--
-- case kInstEmptyWidth:
-- return StringPrintf("emptywidth %#x -> %d",
-- static_cast<int>(empty_), out());
--
-- case kInstMatch:
-- return StringPrintf("match! %d", match_id());
--
-- case kInstNop:
-- return StringPrintf("nop -> %d", out());
--
-- case kInstFail:
-- return StringPrintf("fail");
-- }
--}
--
--Prog::Prog()
-- : anchor_start_(false),
-- anchor_end_(false),
-- reversed_(false),
-- did_onepass_(false),
-- start_(0),
-- start_unanchored_(0),
-- size_(0),
-- byte_inst_count_(0),
-- bytemap_range_(0),
-- flags_(0),
-- onepass_statesize_(0),
-- inst_(NULL),
-- dfa_first_(NULL),
-- dfa_longest_(NULL),
-- dfa_mem_(0),
-- delete_dfa_(NULL),
-- unbytemap_(NULL),
-- onepass_nodes_(NULL),
-- onepass_start_(NULL) {
--}
--
--Prog::~Prog() {
-- if (delete_dfa_) {
-- if (dfa_first_)
-- delete_dfa_(dfa_first_);
-- if (dfa_longest_)
-- delete_dfa_(dfa_longest_);
-- }
-- delete[] onepass_nodes_;
-- delete[] inst_;
-- delete[] unbytemap_;
--}
--
--typedef SparseSet Workq;
--
--static inline void AddToQueue(Workq* q, int id) {
-- if (id != 0)
-- q->insert(id);
--}
--
--static string ProgToString(Prog* prog, Workq* q) {
-- string s;
--
-- for (Workq::iterator i = q->begin(); i != q->end(); ++i) {
-- int id = *i;
-- Prog::Inst* ip = prog->inst(id);
-- StringAppendF(&s, "%d. %s\n", id, ip->Dump().c_str());
-- AddToQueue(q, ip->out());
-- if (ip->opcode() == kInstAlt || ip->opcode() == kInstAltMatch)
-- AddToQueue(q, ip->out1());
-- }
-- return s;
--}
--
--string Prog::Dump() {
-- string map;
-- if (false) { // Debugging
-- int lo = 0;
-- StringAppendF(&map, "byte map:\n");
-- for (int i = 0; i < bytemap_range_; i++) {
-- StringAppendF(&map, "\t%d. [%02x-%02x]\n", i, lo, unbytemap_[i]);
-- lo = unbytemap_[i] + 1;
-- }
-- StringAppendF(&map, "\n");
-- }
--
-- Workq q(size_);
-- AddToQueue(&q, start_);
-- return map + ProgToString(this, &q);
--}
--
--string Prog::DumpUnanchored() {
-- Workq q(size_);
-- AddToQueue(&q, start_unanchored_);
-- return ProgToString(this, &q);
--}
--
--static bool IsMatch(Prog*, Prog::Inst*);
--
--// Peep-hole optimizer.
--void Prog::Optimize() {
-- Workq q(size_);
--
-- // Eliminate nops. Most are taken out during compilation
-- // but a few are hard to avoid.
-- q.clear();
-- AddToQueue(&q, start_);
-- for (Workq::iterator i = q.begin(); i != q.end(); ++i) {
-- int id = *i;
--
-- Inst* ip = inst(id);
-- int j = ip->out();
-- Inst* jp;
-- while (j != 0 && (jp=inst(j))->opcode() == kInstNop) {
-- j = jp->out();
-- }
-- ip->set_out(j);
-- AddToQueue(&q, ip->out());
--
-- if (ip->opcode() == kInstAlt) {
-- j = ip->out1();
-- while (j != 0 && (jp=inst(j))->opcode() == kInstNop) {
-- j = jp->out();
-- }
-- ip->out1_ = j;
-- AddToQueue(&q, ip->out1());
-- }
-- }
--
-- // Insert kInstAltMatch instructions
-- // Look for
-- // ip: Alt -> j | k
-- // j: ByteRange [00-FF] -> ip
-- // k: Match
-- // or the reverse (the above is the greedy one).
-- // Rewrite Alt to AltMatch.
-- q.clear();
-- AddToQueue(&q, start_);
-- for (Workq::iterator i = q.begin(); i != q.end(); ++i) {
-- int id = *i;
-- Inst* ip = inst(id);
-- AddToQueue(&q, ip->out());
-- if (ip->opcode() == kInstAlt)
-- AddToQueue(&q, ip->out1());
--
-- if (ip->opcode() == kInstAlt) {
-- Inst* j = inst(ip->out());
-- Inst* k = inst(ip->out1());
-- if (j->opcode() == kInstByteRange && j->out() == id &&
-- j->lo() == 0x00 && j->hi() == 0xFF &&
-- IsMatch(this, k)) {
-- ip->set_opcode(kInstAltMatch);
-- continue;
-- }
-- if (IsMatch(this, j) &&
-- k->opcode() == kInstByteRange && k->out() == id &&
-- k->lo() == 0x00 && k->hi() == 0xFF) {
-- ip->set_opcode(kInstAltMatch);
-- }
-- }
-- }
--}
--
--// Is ip a guaranteed match at end of text, perhaps after some capturing?
--static bool IsMatch(Prog* prog, Prog::Inst* ip) {
-- for (;;) {
-- switch (ip->opcode()) {
-- default:
-- LOG(DFATAL) << "Unexpected opcode in IsMatch: " << ip->opcode();
-- return false;
--
-- case kInstAlt:
-- case kInstAltMatch:
-- case kInstByteRange:
-- case kInstFail:
-- case kInstEmptyWidth:
-- return false;
--
-- case kInstCapture:
-- case kInstNop:
-- ip = prog->inst(ip->out());
-- break;
--
-- case kInstMatch:
-- return true;
-- }
-- }
--}
--
--uint32 Prog::EmptyFlags(const StringPiece& text, const char* p) {
-- int flags = 0;
--
-- // ^ and \A
-- if (p == text.begin())
-- flags |= kEmptyBeginText | kEmptyBeginLine;
-- else if (p[-1] == '\n')
-- flags |= kEmptyBeginLine;
--
-- // $ and \z
-- if (p == text.end())
-- flags |= kEmptyEndText | kEmptyEndLine;
-- else if (p < text.end() && p[0] == '\n')
-- flags |= kEmptyEndLine;
--
-- // \b and \B
-- if (p == text.begin() && p == text.end()) {
-- // no word boundary here
-- } else if (p == text.begin()) {
-- if (IsWordChar(p[0]))
-- flags |= kEmptyWordBoundary;
-- } else if (p == text.end()) {
-- if (IsWordChar(p[-1]))
-- flags |= kEmptyWordBoundary;
-- } else {
-- if (IsWordChar(p[-1]) != IsWordChar(p[0]))
-- flags |= kEmptyWordBoundary;
-- }
-- if (!(flags & kEmptyWordBoundary))
-- flags |= kEmptyNonWordBoundary;
--
-- return flags;
--}
--
--void Prog::MarkByteRange(int lo, int hi) {
-- CHECK_GE(lo, 0);
-- CHECK_GE(hi, 0);
-- CHECK_LE(lo, 255);
-- CHECK_LE(hi, 255);
-- if (lo > 0)
-- byterange_.Set(lo - 1);
-- byterange_.Set(hi);
--}
--
--void Prog::ComputeByteMap() {
-- // Fill in bytemap with byte classes for prog_.
-- // Ranges of bytes that are treated as indistinguishable
-- // by the regexp program are mapped to a single byte class.
-- // The vector prog_->byterange() marks the end of each
-- // such range.
-- const Bitmap<256>& v = byterange();
--
-- COMPILE_ASSERT(8*sizeof(v.Word(0)) == 32, wordsize);
-- uint8 n = 0;
-- uint32 bits = 0;
-- for (int i = 0; i < 256; i++) {
-- if ((i&31) == 0)
-- bits = v.Word(i >> 5);
-- bytemap_[i] = n;
-- n += bits & 1;
-- bits >>= 1;
-- }
-- bytemap_range_ = bytemap_[255] + 1;
-- unbytemap_ = new uint8[bytemap_range_];
-- for (int i = 0; i < 256; i++)
-- unbytemap_[bytemap_[i]] = i;
--
-- if (0) { // For debugging: use trivial byte map.
-- for (int i = 0; i < 256; i++) {
-- bytemap_[i] = i;
-- unbytemap_[i] = i;
-- }
-- bytemap_range_ = 256;
-- LOG(INFO) << "Using trivial bytemap.";
-- }
--}
--
--} // namespace re2
--
-diff --git a/re2/re2/prog.h b/re2/re2/prog.h
-deleted file mode 100644
-index 2cf65bc..0000000
---- a/re2/re2/prog.h
-+++ /dev/null
-@@ -1,376 +0,0 @@
--// Copyright 2007 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Compiled representation of regular expressions.
--// See regexp.h for the Regexp class, which represents a regular
--// expression symbolically.
--
--#ifndef RE2_PROG_H__
--#define RE2_PROG_H__
--
--#include "util/util.h"
--#include "re2/re2.h"
--
--namespace re2 {
--
--// Simple fixed-size bitmap.
--template<int Bits>
--class Bitmap {
-- public:
-- Bitmap() { Reset(); }
-- int Size() { return Bits; }
--
-- void Reset() {
-- for (int i = 0; i < Words; i++)
-- w_[i] = 0;
-- }
-- bool Get(int k) const {
-- return w_[k >> WordLog] & (1<<(k & 31));
-- }
-- void Set(int k) {
-- w_[k >> WordLog] |= 1<<(k & 31);
-- }
-- void Clear(int k) {
-- w_[k >> WordLog] &= ~(1<<(k & 31));
-- }
-- uint32 Word(int i) const {
-- return w_[i];
-- }
--
-- private:
-- static const int WordLog = 5;
-- static const int Words = (Bits+31)/32;
-- uint32 w_[Words];
-- DISALLOW_EVIL_CONSTRUCTORS(Bitmap);
--};
--
--
--// Opcodes for Inst
--enum InstOp {
-- kInstAlt = 0, // choose between out_ and out1_
-- kInstAltMatch, // Alt: out_ is [00-FF] and back, out1_ is match; or vice versa.
-- kInstByteRange, // next (possible case-folded) byte must be in [lo_, hi_]
-- kInstCapture, // capturing parenthesis number cap_
-- kInstEmptyWidth, // empty-width special (^ $ ...); bit(s) set in empty_
-- kInstMatch, // found a match!
-- kInstNop, // no-op; occasionally unavoidable
-- kInstFail, // never match; occasionally unavoidable
--};
--
--// Bit flags for empty-width specials
--enum EmptyOp {
-- kEmptyBeginLine = 1<<0, // ^ - beginning of line
-- kEmptyEndLine = 1<<1, // $ - end of line
-- kEmptyBeginText = 1<<2, // \A - beginning of text
-- kEmptyEndText = 1<<3, // \z - end of text
-- kEmptyWordBoundary = 1<<4, // \b - word boundary
-- kEmptyNonWordBoundary = 1<<5, // \B - not \b
-- kEmptyAllFlags = (1<<6)-1,
--};
--
--class Regexp;
--
--class DFA;
--struct OneState;
--
--// Compiled form of regexp program.
--class Prog {
-- public:
-- Prog();
-- ~Prog();
--
-- // Single instruction in regexp program.
-- class Inst {
-- public:
-- Inst() : out_opcode_(0), out1_(0) { }
--
-- // Constructors per opcode
-- void InitAlt(uint32 out, uint32 out1);
-- void InitByteRange(int lo, int hi, int foldcase, uint32 out);
-- void InitCapture(int cap, uint32 out);
-- void InitEmptyWidth(EmptyOp empty, uint32 out);
-- void InitMatch(int id);
-- void InitNop(uint32 out);
-- void InitFail();
--
-- // Getters
-- int id(Prog* p) { return this - p->inst_; }
-- InstOp opcode() { return static_cast<InstOp>(out_opcode_&7); }
-- int out() { return out_opcode_>>3; }
-- int out1() { DCHECK(opcode() == kInstAlt || opcode() == kInstAltMatch); return out1_; }
-- int cap() { DCHECK_EQ(opcode(), kInstCapture); return cap_; }
-- int lo() { DCHECK_EQ(opcode(), kInstByteRange); return lo_; }
-- int hi() { DCHECK_EQ(opcode(), kInstByteRange); return hi_; }
-- int foldcase() { DCHECK_EQ(opcode(), kInstByteRange); return foldcase_; }
-- int match_id() { DCHECK_EQ(opcode(), kInstMatch); return match_id_; }
-- EmptyOp empty() { DCHECK_EQ(opcode(), kInstEmptyWidth); return empty_; }
-- bool greedy(Prog *p) {
-- DCHECK_EQ(opcode(), kInstAltMatch);
-- return p->inst(out())->opcode() == kInstByteRange;
-- }
--
-- // Does this inst (an kInstByteRange) match c?
-- inline bool Matches(int c) {
-- DCHECK_EQ(opcode(), kInstByteRange);
-- if (foldcase_ && 'A' <= c && c <= 'Z')
-- c += 'a' - 'A';
-- return lo_ <= c && c <= hi_;
-- }
--
-- // Returns string representation for debugging.
-- string Dump();
--
-- // Maximum instruction id.
-- // (Must fit in out_opcode_, and PatchList steals another bit.)
-- static const int kMaxInst = (1<<28) - 1;
--
-- private:
-- void set_opcode(InstOp opcode) {
-- out_opcode_ = (out()<<3) | opcode;
-- }
--
-- void set_out(int out) {
-- out_opcode_ = (out<<3) | opcode();
-- }
--
-- void set_out_opcode(int out, InstOp opcode) {
-- out_opcode_ = (out<<3) | opcode;
-- }
--
-- uint32 out_opcode_; // 29 bits of out, 3 (low) bits opcode
-- union { // additional instruction arguments:
-- uint32 out1_; // opcode == kInstAlt
-- // alternate next instruction
--
-- int32 cap_; // opcode == kInstCapture
-- // Index of capture register (holds text
-- // position recorded by capturing parentheses).
-- // For \n (the submatch for the nth parentheses),
-- // the left parenthesis captures into register 2*n
-- // and the right one captures into register 2*n+1.
--
-- int32 match_id_; // opcode == kInstMatch
-- // Match ID to identify this match (for re2::Set).
--
-- struct { // opcode == kInstByteRange
-- uint8 lo_; // byte range is lo_-hi_ inclusive
-- uint8 hi_; //
-- uint8 foldcase_; // convert A-Z to a-z before checking range.
-- };
--
-- EmptyOp empty_; // opcode == kInstEmptyWidth
-- // empty_ is bitwise OR of kEmpty* flags above.
-- };
--
-- friend class Compiler;
-- friend struct PatchList;
-- friend class Prog;
--
-- DISALLOW_EVIL_CONSTRUCTORS(Inst);
-- };
--
-- // Whether to anchor the search.
-- enum Anchor {
-- kUnanchored, // match anywhere
-- kAnchored, // match only starting at beginning of text
-- };
--
-- // Kind of match to look for (for anchor != kFullMatch)
-- //
-- // kLongestMatch mode finds the overall longest
-- // match but still makes its submatch choices the way
-- // Perl would, not in the way prescribed by POSIX.
-- // The POSIX rules are much more expensive to implement,
-- // and no one has needed them.
-- //
-- // kFullMatch is not strictly necessary -- we could use
-- // kLongestMatch and then check the length of the match -- but
-- // the matching code can run faster if it knows to consider only
-- // full matches.
-- enum MatchKind {
-- kFirstMatch, // like Perl, PCRE
-- kLongestMatch, // like egrep or POSIX
-- kFullMatch, // match only entire text; implies anchor==kAnchored
-- kManyMatch // for SearchDFA, records set of matches
-- };
--
-- Inst *inst(int id) { return &inst_[id]; }
-- int start() { return start_; }
-- int start_unanchored() { return start_unanchored_; }
-- void set_start(int start) { start_ = start; }
-- void set_start_unanchored(int start) { start_unanchored_ = start; }
-- int64 size() { return size_; }
-- bool reversed() { return reversed_; }
-- void set_reversed(bool reversed) { reversed_ = reversed; }
-- int64 byte_inst_count() { return byte_inst_count_; }
-- const Bitmap<256>& byterange() { return byterange_; }
-- void set_dfa_mem(int64 dfa_mem) { dfa_mem_ = dfa_mem; }
-- int64 dfa_mem() { return dfa_mem_; }
-- int flags() { return flags_; }
-- void set_flags(int flags) { flags_ = flags; }
-- bool anchor_start() { return anchor_start_; }
-- void set_anchor_start(bool b) { anchor_start_ = b; }
-- bool anchor_end() { return anchor_end_; }
-- void set_anchor_end(bool b) { anchor_end_ = b; }
-- int bytemap_range() { return bytemap_range_; }
-- const uint8* bytemap() { return bytemap_; }
--
-- // Returns string representation of program for debugging.
-- string Dump();
-- string DumpUnanchored();
--
-- // Record that at some point in the prog, the bytes in the range
-- // lo-hi (inclusive) are treated as different from bytes outside the range.
-- // Tracking this lets the DFA collapse commonly-treated byte ranges
-- // when recording state pointers, greatly reducing its memory footprint.
-- void MarkByteRange(int lo, int hi);
--
-- // Returns the set of kEmpty flags that are in effect at
-- // position p within context.
-- static uint32 EmptyFlags(const StringPiece& context, const char* p);
--
-- // Returns whether byte c is a word character: ASCII only.
-- // Used by the implementation of \b and \B.
-- // This is not right for Unicode, but:
-- // - it's hard to get right in a byte-at-a-time matching world
-- // (the DFA has only one-byte lookahead).
-- // - even if the lookahead were possible, the Progs would be huge.
-- // This crude approximation is the same one PCRE uses.
-- static bool IsWordChar(uint8 c) {
-- return ('A' <= c && c <= 'Z') ||
-- ('a' <= c && c <= 'z') ||
-- ('0' <= c && c <= '9') ||
-- c == '_';
-- }
--
-- // Execution engines. They all search for the regexp (run the prog)
-- // in text, which is in the larger context (used for ^ $ \b etc).
-- // Anchor and kind control the kind of search.
-- // Returns true if match found, false if not.
-- // If match found, fills match[0..nmatch-1] with submatch info.
-- // match[0] is overall match, match[1] is first set of parens, etc.
-- // If a particular submatch is not matched during the regexp match,
-- // it is set to NULL.
-- //
-- // Matching text == StringPiece(NULL, 0) is treated as any other empty
-- // string, but note that on return, it will not be possible to distinguish
-- // submatches that matched that empty string from submatches that didn't
-- // match anything. Either way, match[i] == NULL.
--
-- // Search using NFA: can find submatches but kind of slow.
-- bool SearchNFA(const StringPiece& text, const StringPiece& context,
-- Anchor anchor, MatchKind kind,
-- StringPiece* match, int nmatch);
--
-- // Search using DFA: much faster than NFA but only finds
-- // end of match and can use a lot more memory.
-- // Returns whether a match was found.
-- // If the DFA runs out of memory, sets *failed to true and returns false.
-- // If matches != NULL and kind == kManyMatch and there is a match,
-- // SearchDFA fills matches with the match IDs of the final matching state.
-- bool SearchDFA(const StringPiece& text, const StringPiece& context,
-- Anchor anchor, MatchKind kind,
-- StringPiece* match0, bool* failed,
-- vector<int>* matches);
--
-- // Build the entire DFA for the given match kind. FOR TESTING ONLY.
-- // Usually the DFA is built out incrementally, as needed, which
-- // avoids lots of unnecessary work. This function is useful only
-- // for testing purposes. Returns number of states.
-- int BuildEntireDFA(MatchKind kind);
--
-- // Compute byte map.
-- void ComputeByteMap();
--
-- // Run peep-hole optimizer on program.
-- void Optimize();
--
-- // One-pass NFA: only correct if IsOnePass() is true,
-- // but much faster than NFA (competitive with PCRE)
-- // for those expressions.
-- bool IsOnePass();
-- bool SearchOnePass(const StringPiece& text, const StringPiece& context,
-- Anchor anchor, MatchKind kind,
-- StringPiece* match, int nmatch);
--
-- // Bit-state backtracking. Fast on small cases but uses memory
-- // proportional to the product of the program size and the text size.
-- bool SearchBitState(const StringPiece& text, const StringPiece& context,
-- Anchor anchor, MatchKind kind,
-- StringPiece* match, int nmatch);
--
-- static const int kMaxOnePassCapture = 5; // $0 through $4
--
-- // Backtracking search: the gold standard against which the other
-- // implementations are checked. FOR TESTING ONLY.
-- // It allocates a ton of memory to avoid running forever.
-- // It is also recursive, so can't use in production (will overflow stacks).
-- // The name "Unsafe" here is supposed to be a flag that
-- // you should not be using this function.
-- bool UnsafeSearchBacktrack(const StringPiece& text,
-- const StringPiece& context,
-- Anchor anchor, MatchKind kind,
-- StringPiece* match, int nmatch);
--
-- // Computes range for any strings matching regexp. The min and max can in
-- // some cases be arbitrarily precise, so the caller gets to specify the
-- // maximum desired length of string returned.
-- //
-- // Assuming PossibleMatchRange(&min, &max, N) returns successfully, any
-- // string s that is an anchored match for this regexp satisfies
-- // min <= s && s <= max.
-- //
-- // Note that PossibleMatchRange() will only consider the first copy of an
-- // infinitely repeated element (i.e., any regexp element followed by a '*' or
-- // '+' operator). Regexps with "{N}" constructions are not affected, as those
-- // do not compile down to infinite repetitions.
-- //
-- // Returns true on success, false on error.
-- bool PossibleMatchRange(string* min, string* max, int maxlen);
--
-- // Compiles a collection of regexps to Prog. Each regexp will have
-- // its own Match instruction recording the index in the vector.
-- static Prog* CompileSet(const RE2::Options& options, RE2::Anchor anchor,
-- Regexp* re);
--
-- private:
-- friend class Compiler;
--
-- DFA* GetDFA(MatchKind kind);
--
-- bool anchor_start_; // regexp has explicit start anchor
-- bool anchor_end_; // regexp has explicit end anchor
-- bool reversed_; // whether program runs backward over input
-- bool did_onepass_; // has IsOnePass been called?
--
-- int start_; // entry point for program
-- int start_unanchored_; // unanchored entry point for program
-- int size_; // number of instructions
-- int byte_inst_count_; // number of kInstByteRange instructions
-- int bytemap_range_; // bytemap_[x] < bytemap_range_
-- int flags_; // regexp parse flags
-- int onepass_statesize_; // byte size of each OneState* node
--
-- Inst* inst_; // pointer to instruction array
--
-- Mutex dfa_mutex_; // Protects dfa_first_, dfa_longest_
-- DFA* volatile dfa_first_; // DFA cached for kFirstMatch
-- DFA* volatile dfa_longest_; // DFA cached for kLongestMatch and kFullMatch
-- int64 dfa_mem_; // Maximum memory for DFAs.
-- void (*delete_dfa_)(DFA* dfa);
--
-- Bitmap<256> byterange_; // byterange.Get(x) true if x ends a
-- // commonly-treated byte range.
-- uint8 bytemap_[256]; // map from input bytes to byte classes
-- uint8 *unbytemap_; // bytemap_[unbytemap_[x]] == x
--
-- uint8* onepass_nodes_; // data for OnePass nodes
-- OneState* onepass_start_; // start node for OnePass program
--
-- DISALLOW_EVIL_CONSTRUCTORS(Prog);
--};
--
--} // namespace re2
--
--#endif // RE2_PROG_H__
-diff --git a/re2/re2/re2.cc b/re2/re2/re2.cc
-deleted file mode 100644
-index ddc952d..0000000
---- a/re2/re2/re2.cc
-+++ /dev/null
-@@ -1,1182 +0,0 @@
--// Copyright 2003-2009 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Regular expression interface RE2.
--//
--// Originally the PCRE C++ wrapper, but adapted to use
--// the new automata-based regular expression engines.
--
--#include "re2/re2.h"
--
--#include <stdio.h>
--#include <string>
--#ifndef WIN32
--#include <pthread.h>
--#endif
--#include <errno.h>
--#include "util/util.h"
--#include "util/flags.h"
--#include "re2/prog.h"
--#include "re2/regexp.h"
--
--DEFINE_bool(trace_re2, false, "trace RE2 execution");
--
--namespace re2 {
--
--// Maximum number of args we can set
--static const int kMaxArgs = 16;
--static const int kVecSize = 1+kMaxArgs;
--
--const VariadicFunction2<bool, const StringPiece&, const RE2&, RE2::Arg, RE2::FullMatchN> RE2::FullMatch;
--const VariadicFunction2<bool, const StringPiece&, const RE2&, RE2::Arg, RE2::PartialMatchN> RE2::PartialMatch;
--const VariadicFunction2<bool, StringPiece*, const RE2&, RE2::Arg, RE2::ConsumeN> RE2::Consume;
--const VariadicFunction2<bool, StringPiece*, const RE2&, RE2::Arg, RE2::FindAndConsumeN> RE2::FindAndConsume;
--
--const int RE2::Options::kDefaultMaxMem; // initialized in re2.h
--
--// Commonly-used option sets; arguments to constructor are:
--// utf8 input
--// posix syntax
--// longest match
--// log errors
--const RE2::Options RE2::DefaultOptions; // EncodingUTF8, false, false, true
--const RE2::Options RE2::Latin1(RE2::Options::EncodingLatin1, false, false, true);
--const RE2::Options RE2::POSIX(RE2::Options::EncodingUTF8, true, true, true);
--const RE2::Options RE2::Quiet(RE2::Options::EncodingUTF8, false, false, false);
--
--// If a regular expression has no error, its error_ field points here
--static const string empty_string;
--
--// Converts from Regexp error code to RE2 error code.
--// Maybe some day they will diverge. In any event, this
--// hides the existence of Regexp from RE2 users.
--static RE2::ErrorCode RegexpErrorToRE2(re2::RegexpStatusCode code) {
-- switch (code) {
-- case re2::kRegexpSuccess:
-- return RE2::NoError;
-- case re2::kRegexpInternalError:
-- return RE2::ErrorInternal;
-- case re2::kRegexpBadEscape:
-- return RE2::ErrorBadEscape;
-- case re2::kRegexpBadCharClass:
-- return RE2::ErrorBadCharClass;
-- case re2::kRegexpBadCharRange:
-- return RE2::ErrorBadCharRange;
-- case re2::kRegexpMissingBracket:
-- return RE2::ErrorMissingBracket;
-- case re2::kRegexpMissingParen:
-- return RE2::ErrorMissingParen;
-- case re2::kRegexpTrailingBackslash:
-- return RE2::ErrorTrailingBackslash;
-- case re2::kRegexpRepeatArgument:
-- return RE2::ErrorRepeatArgument;
-- case re2::kRegexpRepeatSize:
-- return RE2::ErrorRepeatSize;
-- case re2::kRegexpRepeatOp:
-- return RE2::ErrorRepeatOp;
-- case re2::kRegexpBadPerlOp:
-- return RE2::ErrorBadPerlOp;
-- case re2::kRegexpBadUTF8:
-- return RE2::ErrorBadUTF8;
-- case re2::kRegexpBadNamedCapture:
-- return RE2::ErrorBadNamedCapture;
-- }
-- return RE2::ErrorInternal;
--}
--
--static string trunc(const StringPiece& pattern) {
-- if (pattern.size() < 100)
-- return pattern.as_string();
-- return pattern.substr(0, 100).as_string() + "...";
--}
--
--
--RE2::RE2(const char* pattern) {
-- Init(pattern, DefaultOptions);
--}
--
--RE2::RE2(const string& pattern) {
-- Init(pattern, DefaultOptions);
--}
--
--RE2::RE2(const StringPiece& pattern) {
-- Init(pattern, DefaultOptions);
--}
--
--RE2::RE2(const StringPiece& pattern, const Options& options) {
-- Init(pattern, options);
--}
--
--int RE2::Options::ParseFlags() const {
-- int flags = Regexp::ClassNL;
-- switch (encoding()) {
-- default:
-- LOG(ERROR) << "Unknown encoding " << encoding();
-- break;
-- case RE2::Options::EncodingUTF8:
-- break;
-- case RE2::Options::EncodingLatin1:
-- flags |= Regexp::Latin1;
-- break;
-- }
--
-- if (!posix_syntax())
-- flags |= Regexp::LikePerl;
--
-- if (literal())
-- flags |= Regexp::Literal;
--
-- if (never_nl())
-- flags |= Regexp::NeverNL;
--
-- if (!case_sensitive())
-- flags |= Regexp::FoldCase;
--
-- if (perl_classes())
-- flags |= Regexp::PerlClasses;
--
-- if (word_boundary())
-- flags |= Regexp::PerlB;
--
-- if (one_line())
-- flags |= Regexp::OneLine;
--
-- return flags;
--}
--
--void RE2::Init(const StringPiece& pattern, const Options& options) {
-- mutex_ = new Mutex;
-- pattern_ = pattern.as_string();
-- options_.Copy(options);
-- error_ = &empty_string;
-- error_code_ = NoError;
-- suffix_regexp_ = NULL;
-- entire_regexp_ = NULL;
-- prog_ = NULL;
-- rprog_ = NULL;
-- named_groups_ = NULL;
-- group_names_ = NULL;
-- num_captures_ = -1;
--
-- RegexpStatus status;
-- entire_regexp_ = Regexp::Parse(
-- pattern_,
-- static_cast<Regexp::ParseFlags>(options_.ParseFlags()),
-- &status);
-- if (entire_regexp_ == NULL) {
-- if (error_ == &empty_string)
-- error_ = new string(status.Text());
-- if (options_.log_errors()) {
-- LOG(ERROR) << "Error parsing '" << trunc(pattern_) << "': "
-- << status.Text();
-- }
-- error_arg_ = status.error_arg().as_string();
-- error_code_ = RegexpErrorToRE2(status.code());
-- return;
-- }
--
-- prefix_.clear();
-- prefix_foldcase_ = false;
-- re2::Regexp* suffix;
-- if (entire_regexp_->RequiredPrefix(&prefix_, &prefix_foldcase_, &suffix))
-- suffix_regexp_ = suffix;
-- else
-- suffix_regexp_ = entire_regexp_->Incref();
--
-- // Two thirds of the memory goes to the forward Prog,
-- // one third to the reverse prog, because the forward
-- // Prog has two DFAs but the reverse prog has one.
-- prog_ = suffix_regexp_->CompileToProg(options_.max_mem()*2/3);
-- if (prog_ == NULL) {
-- if (options_.log_errors())
-- LOG(ERROR) << "Error compiling '" << trunc(pattern_) << "'";
-- error_ = new string("pattern too large - compile failed");
-- error_code_ = RE2::ErrorPatternTooLarge;
-- return;
-- }
--
-- // Could delay this until the first match call that
-- // cares about submatch information, but the one-pass
-- // machine's memory gets cut from the DFA memory budget,
-- // and that is harder to do if the DFA has already
-- // been built.
-- is_one_pass_ = prog_->IsOnePass();
--}
--
--// Returns rprog_, computing it if needed.
--re2::Prog* RE2::ReverseProg() const {
-- MutexLock l(mutex_);
-- if (rprog_ == NULL && error_ == &empty_string) {
-- rprog_ = suffix_regexp_->CompileToReverseProg(options_.max_mem()/3);
-- if (rprog_ == NULL) {
-- if (options_.log_errors())
-- LOG(ERROR) << "Error reverse compiling '" << trunc(pattern_) << "'";
-- error_ = new string("pattern too large - reverse compile failed");
-- error_code_ = RE2::ErrorPatternTooLarge;
-- return NULL;
-- }
-- }
-- return rprog_;
--}
--
--static const map<string, int> empty_named_groups;
--static const map<int, string> empty_group_names;
--
--RE2::~RE2() {
-- if (suffix_regexp_)
-- suffix_regexp_->Decref();
-- if (entire_regexp_)
-- entire_regexp_->Decref();
-- delete mutex_;
-- delete prog_;
-- delete rprog_;
-- if (error_ != &empty_string)
-- delete error_;
-- if (named_groups_ != NULL && named_groups_ != &empty_named_groups)
-- delete named_groups_;
-- if (group_names_ != NULL && group_names_ != &empty_group_names)
-- delete group_names_;
--}
--
--int RE2::ProgramSize() const {
-- if (prog_ == NULL)
-- return -1;
-- return prog_->size();
--}
--
--// Returns named_groups_, computing it if needed.
--const map<string, int>& RE2::NamedCapturingGroups() const {
-- MutexLock l(mutex_);
-- if (!ok())
-- return empty_named_groups;
-- if (named_groups_ == NULL) {
-- named_groups_ = suffix_regexp_->NamedCaptures();
-- if (named_groups_ == NULL)
-- named_groups_ = &empty_named_groups;
-- }
-- return *named_groups_;
--}
--
--// Returns group_names_, computing it if needed.
--const map<int, string>& RE2::CapturingGroupNames() const {
-- MutexLock l(mutex_);
-- if (!ok())
-- return empty_group_names;
-- if (group_names_ == NULL) {
-- group_names_ = suffix_regexp_->CaptureNames();
-- if (group_names_ == NULL)
-- group_names_ = &empty_group_names;
-- }
-- return *group_names_;
--}
--
--/***** Convenience interfaces *****/
--
--bool RE2::FullMatchN(const StringPiece& text, const RE2& re,
-- const Arg* const args[], int n) {
-- return re.DoMatch(text, ANCHOR_BOTH, NULL, args, n);
--}
--
--bool RE2::PartialMatchN(const StringPiece& text, const RE2& re,
-- const Arg* const args[], int n) {
-- return re.DoMatch(text, UNANCHORED, NULL, args, n);
--}
--
--bool RE2::ConsumeN(StringPiece* input, const RE2& re,
-- const Arg* const args[], int n) {
-- int consumed;
-- if (re.DoMatch(*input, ANCHOR_START, &consumed, args, n)) {
-- input->remove_prefix(consumed);
-- return true;
-- } else {
-- return false;
-- }
--}
--
--bool RE2::FindAndConsumeN(StringPiece* input, const RE2& re,
-- const Arg* const args[], int n) {
-- int consumed;
-- if (re.DoMatch(*input, UNANCHORED, &consumed, args, n)) {
-- input->remove_prefix(consumed);
-- return true;
-- } else {
-- return false;
-- }
--}
--
--// Returns the maximum submatch needed for the rewrite to be done by Replace().
--// E.g. if rewrite == "foo \\2,\\1", returns 2.
--static int MaxSubmatch(const StringPiece& rewrite) {
-- int max = 0;
-- for (const char *s = rewrite.data(), *end = s + rewrite.size();
-- s < end; s++) {
-- if (*s == '\\') {
-- s++;
-- int c = (s < end) ? *s : -1;
-- if (isdigit(c)) {
-- int n = (c - '0');
-- if (n > max)
-- max = n;
-- }
-- }
-- }
-- return max;
--}
--
--bool RE2::Replace(string *str,
-- const RE2& re,
-- const StringPiece& rewrite) {
-- StringPiece vec[kVecSize];
-- int nvec = 1 + MaxSubmatch(rewrite);
-- if (nvec > arraysize(vec))
-- return false;
-- if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec))
-- return false;
--
-- string s;
-- if (!re.Rewrite(&s, rewrite, vec, nvec))
-- return false;
--
-- assert(vec[0].begin() >= str->data());
-- assert(vec[0].end() <= str->data()+str->size());
-- str->replace(vec[0].data() - str->data(), vec[0].size(), s);
-- return true;
--}
--
--int RE2::GlobalReplace(string *str,
-- const RE2& re,
-- const StringPiece& rewrite) {
-- StringPiece vec[kVecSize];
-- int nvec = 1 + MaxSubmatch(rewrite);
-- if (nvec > arraysize(vec))
-- return false;
--
-- const char* p = str->data();
-- const char* ep = p + str->size();
-- const char* lastend = NULL;
-- string out;
-- int count = 0;
-- while (p <= ep) {
-- if (!re.Match(*str, p - str->data(), str->size(), UNANCHORED, vec, nvec))
-- break;
-- if (p < vec[0].begin())
-- out.append(p, vec[0].begin() - p);
-- if (vec[0].begin() == lastend && vec[0].size() == 0) {
-- // Disallow empty match at end of last match: skip ahead.
-- if (p < ep)
-- out.append(p, 1);
-- p++;
-- continue;
-- }
-- re.Rewrite(&out, rewrite, vec, nvec);
-- p = vec[0].end();
-- lastend = p;
-- count++;
-- }
--
-- if (count == 0)
-- return 0;
--
-- if (p < ep)
-- out.append(p, ep - p);
-- swap(out, *str);
-- return count;
--}
--
--bool RE2::Extract(const StringPiece &text,
-- const RE2& re,
-- const StringPiece &rewrite,
-- string *out) {
-- StringPiece vec[kVecSize];
-- int nvec = 1 + MaxSubmatch(rewrite);
-- if (nvec > arraysize(vec))
-- return false;
--
-- if (!re.Match(text, 0, text.size(), UNANCHORED, vec, nvec))
-- return false;
--
-- out->clear();
-- return re.Rewrite(out, rewrite, vec, nvec);
--}
--
--string RE2::QuoteMeta(const StringPiece& unquoted) {
-- string result;
-- result.reserve(unquoted.size() << 1);
--
-- // Escape any ascii character not in [A-Za-z_0-9].
-- //
-- // Note that it's legal to escape a character even if it has no
-- // special meaning in a regular expression -- so this function does
-- // that. (This also makes it identical to the perl function of the
-- // same name except for the null-character special case;
-- // see `perldoc -f quotemeta`.)
-- for (int ii = 0; ii < unquoted.length(); ++ii) {
-- // Note that using 'isalnum' here raises the benchmark time from
-- // 32ns to 58ns:
-- if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
-- (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
-- (unquoted[ii] < '0' || unquoted[ii] > '9') &&
-- unquoted[ii] != '_' &&
-- // If this is the part of a UTF8 or Latin1 character, we need
-- // to copy this byte without escaping. Experimentally this is
-- // what works correctly with the regexp library.
-- !(unquoted[ii] & 128)) {
-- if (unquoted[ii] == '\0') { // Special handling for null chars.
-- // Note that this special handling is not strictly required for RE2,
-- // but this quoting is required for other regexp libraries such as
-- // PCRE.
-- // Can't use "\\0" since the next character might be a digit.
-- result += "\\x00";
-- continue;
-- }
-- result += '\\';
-- }
-- result += unquoted[ii];
-- }
--
-- return result;
--}
--
--bool RE2::PossibleMatchRange(string* min, string* max, int maxlen) const {
-- if (prog_ == NULL)
-- return false;
--
-- int n = prefix_.size();
-- if (n > maxlen)
-- n = maxlen;
--
-- // Determine initial min max from prefix_ literal.
-- string pmin, pmax;
-- pmin = prefix_.substr(0, n);
-- pmax = prefix_.substr(0, n);
-- if (prefix_foldcase_) {
-- // prefix is ASCII lowercase; change pmin to uppercase.
-- for (int i = 0; i < n; i++) {
-- if ('a' <= pmin[i] && pmin[i] <= 'z')
-- pmin[i] += 'A' - 'a';
-- }
-- }
--
-- // Add to prefix min max using PossibleMatchRange on regexp.
-- string dmin, dmax;
-- maxlen -= n;
-- if (maxlen > 0 && prog_->PossibleMatchRange(&dmin, &dmax, maxlen)) {
-- pmin += dmin;
-- pmax += dmax;
-- } else if (pmax.size() > 0) {
-- // prog_->PossibleMatchRange has failed us,
-- // but we still have useful information from prefix_.
-- // Round up pmax to allow any possible suffix.
-- pmax = PrefixSuccessor(pmax);
-- } else {
-- // Nothing useful.
-- *min = "";
-- *max = "";
-- return false;
-- }
--
-- *min = pmin;
-- *max = pmax;
-- return true;
--}
--
--// Avoid possible locale nonsense in standard strcasecmp.
--// The string a is known to be all lowercase.
--static int ascii_strcasecmp(const char* a, const char* b, int len) {
-- const char *ae = a + len;
--
-- for (; a < ae; a++, b++) {
-- uint8 x = *a;
-- uint8 y = *b;
-- if ('A' <= y && y <= 'Z')
-- y += 'a' - 'A';
-- if (x != y)
-- return x - y;
-- }
-- return 0;
--}
--
--
--/***** Actual matching and rewriting code *****/
--
--bool RE2::Match(const StringPiece& text,
-- int startpos,
-- int endpos,
-- Anchor re_anchor,
-- StringPiece* submatch,
-- int nsubmatch) const {
-- if (!ok() || suffix_regexp_ == NULL) {
-- if (options_.log_errors())
-- LOG(ERROR) << "Invalid RE2: " << *error_;
-- return false;
-- }
--
-- if (startpos < 0 || startpos > endpos || endpos > text.size()) {
-- LOG(ERROR) << "RE2: invalid startpos, endpos pair.";
-- return false;
-- }
--
-- StringPiece subtext = text;
-- subtext.remove_prefix(startpos);
-- subtext.remove_suffix(text.size() - endpos);
--
-- // Use DFAs to find exact location of match, filter out non-matches.
--
-- // Don't ask for the location if we won't use it.
-- // SearchDFA can do extra optimizations in that case.
-- StringPiece match;
-- StringPiece* matchp = &match;
-- if (nsubmatch == 0)
-- matchp = NULL;
--
-- int ncap = 1 + NumberOfCapturingGroups();
-- if (ncap > nsubmatch)
-- ncap = nsubmatch;
--
-- // If the regexp is anchored explicitly, must not be in middle of text.
-- if (prog_->anchor_start() && startpos != 0)
-- return false;
--
-- // If the regexp is anchored explicitly, update re_anchor
-- // so that we can potentially fall into a faster case below.
-- if (prog_->anchor_start() && prog_->anchor_end())
-- re_anchor = ANCHOR_BOTH;
-- else if (prog_->anchor_start() && re_anchor != ANCHOR_BOTH)
-- re_anchor = ANCHOR_START;
--
-- // Check for the required prefix, if any.
-- int prefixlen = 0;
-- if (!prefix_.empty()) {
-- if (startpos != 0)
-- return false;
-- prefixlen = prefix_.size();
-- if (prefixlen > subtext.size())
-- return false;
-- if (prefix_foldcase_) {
-- if (ascii_strcasecmp(&prefix_[0], subtext.data(), prefixlen) != 0)
-- return false;
-- } else {
-- if (memcmp(&prefix_[0], subtext.data(), prefixlen) != 0)
-- return false;
-- }
-- subtext.remove_prefix(prefixlen);
-- // If there is a required prefix, the anchor must be at least ANCHOR_START.
-- if (re_anchor != ANCHOR_BOTH)
-- re_anchor = ANCHOR_START;
-- }
--
-- Prog::Anchor anchor = Prog::kUnanchored;
-- Prog::MatchKind kind = Prog::kFirstMatch;
-- if (options_.longest_match())
-- kind = Prog::kLongestMatch;
-- bool skipped_test = false;
--
-- bool can_one_pass = (is_one_pass_ && ncap <= Prog::kMaxOnePassCapture);
--
-- // SearchBitState allocates a bit vector of size prog_->size() * text.size().
-- // It also allocates a stack of 3-word structures which could potentially
-- // grow as large as prog_->size() * text.size() but in practice is much
-- // smaller.
-- // Conditions for using SearchBitState:
-- const int MaxBitStateProg = 500; // prog_->size() <= Max.
-- const int MaxBitStateVector = 256*1024; // bit vector size <= Max (bits)
-- bool can_bit_state = prog_->size() <= MaxBitStateProg;
-- int bit_state_text_max = MaxBitStateVector / prog_->size();
--
-- bool dfa_failed = false;
-- switch (re_anchor) {
-- default:
-- case UNANCHORED: {
-- if (!prog_->SearchDFA(subtext, text, anchor, kind,
-- matchp, &dfa_failed, NULL)) {
-- if (dfa_failed) {
-- // Fall back to NFA below.
-- skipped_test = true;
-- if (FLAGS_trace_re2)
-- LOG(INFO) << "Match " << trunc(pattern_)
-- << " [" << CEscape(subtext) << "]"
-- << " DFA failed.";
-- break;
-- }
-- if (FLAGS_trace_re2)
-- LOG(INFO) << "Match " << trunc(pattern_)
-- << " [" << CEscape(subtext) << "]"
-- << " used DFA - no match.";
-- return false;
-- }
-- if (FLAGS_trace_re2)
-- LOG(INFO) << "Match " << trunc(pattern_)
-- << " [" << CEscape(subtext) << "]"
-- << " used DFA - match";
-- if (matchp == NULL) // Matched. Don't care where
-- return true;
-- // SearchDFA set match[0].end() but didn't know where the
-- // match started. Run the regexp backward from match[0].end()
-- // to find the longest possible match -- that's where it started.
-- Prog* prog = ReverseProg();
-- if (prog == NULL)
-- return false;
-- if (!prog->SearchDFA(match, text, Prog::kAnchored,
-- Prog::kLongestMatch, &match, &dfa_failed, NULL)) {
-- if (dfa_failed) {
-- // Fall back to NFA below.
-- skipped_test = true;
-- if (FLAGS_trace_re2)
-- LOG(INFO) << "Match " << trunc(pattern_)
-- << " [" << CEscape(subtext) << "]"
-- << " reverse DFA failed.";
-- break;
-- }
-- if (FLAGS_trace_re2)
-- LOG(INFO) << "Match " << trunc(pattern_)
-- << " [" << CEscape(subtext) << "]"
-- << " DFA inconsistency.";
-- LOG(ERROR) << "DFA inconsistency";
-- return false;
-- }
-- if (FLAGS_trace_re2)
-- LOG(INFO) << "Match " << trunc(pattern_)
-- << " [" << CEscape(subtext) << "]"
-- << " used reverse DFA.";
-- break;
-- }
--
-- case ANCHOR_BOTH:
-- case ANCHOR_START:
-- if (re_anchor == ANCHOR_BOTH)
-- kind = Prog::kFullMatch;
-- anchor = Prog::kAnchored;
--
-- // If only a small amount of text and need submatch
-- // information anyway and we're going to use OnePass or BitState
-- // to get it, we might as well not even bother with the DFA:
-- // OnePass or BitState will be fast enough.
-- // On tiny texts, OnePass outruns even the DFA, and
-- // it doesn't have the shared state and occasional mutex that
-- // the DFA does.
-- if (can_one_pass && text.size() <= 4096 &&
-- (ncap > 1 || text.size() <= 8)) {
-- if (FLAGS_trace_re2)
-- LOG(INFO) << "Match " << trunc(pattern_)
-- << " [" << CEscape(subtext) << "]"
-- << " skipping DFA for OnePass.";
-- skipped_test = true;
-- break;
-- }
-- if (can_bit_state && text.size() <= bit_state_text_max && ncap > 1) {
-- if (FLAGS_trace_re2)
-- LOG(INFO) << "Match " << trunc(pattern_)
-- << " [" << CEscape(subtext) << "]"
-- << " skipping DFA for BitState.";
-- skipped_test = true;
-- break;
-- }
-- if (!prog_->SearchDFA(subtext, text, anchor, kind,
-- &match, &dfa_failed, NULL)) {
-- if (dfa_failed) {
-- if (FLAGS_trace_re2)
-- LOG(INFO) << "Match " << trunc(pattern_)
-- << " [" << CEscape(subtext) << "]"
-- << " DFA failed.";
-- skipped_test = true;
-- break;
-- }
-- if (FLAGS_trace_re2)
-- LOG(INFO) << "Match " << trunc(pattern_)
-- << " [" << CEscape(subtext) << "]"
-- << " used DFA - no match.";
-- return false;
-- }
-- break;
-- }
--
-- if (!skipped_test && ncap <= 1) {
-- // We know exactly where it matches. That's enough.
-- if (ncap == 1)
-- submatch[0] = match;
-- } else {
-- StringPiece subtext1;
-- if (skipped_test) {
-- // DFA ran out of memory or was skipped:
-- // need to search in entire original text.
-- subtext1 = subtext;
-- } else {
-- // DFA found the exact match location:
-- // let NFA run an anchored, full match search
-- // to find submatch locations.
-- subtext1 = match;
-- anchor = Prog::kAnchored;
-- kind = Prog::kFullMatch;
-- }
--
-- if (can_one_pass && anchor != Prog::kUnanchored) {
-- if (FLAGS_trace_re2)
-- LOG(INFO) << "Match " << trunc(pattern_)
-- << " [" << CEscape(subtext) << "]"
-- << " using OnePass.";
-- if (!prog_->SearchOnePass(subtext1, text, anchor, kind, submatch, ncap)) {
-- if (!skipped_test)
-- LOG(ERROR) << "SearchOnePass inconsistency";
-- return false;
-- }
-- } else if (can_bit_state && subtext1.size() <= bit_state_text_max) {
-- if (FLAGS_trace_re2)
-- LOG(INFO) << "Match " << trunc(pattern_)
-- << " [" << CEscape(subtext) << "]"
-- << " using BitState.";
-- if (!prog_->SearchBitState(subtext1, text, anchor,
-- kind, submatch, ncap)) {
-- if (!skipped_test)
-- LOG(ERROR) << "SearchBitState inconsistency";
-- return false;
-- }
-- } else {
-- if (FLAGS_trace_re2)
-- LOG(INFO) << "Match " << trunc(pattern_)
-- << " [" << CEscape(subtext) << "]"
-- << " using NFA.";
-- if (!prog_->SearchNFA(subtext1, text, anchor, kind, submatch, ncap)) {
-- if (!skipped_test)
-- LOG(ERROR) << "SearchNFA inconsistency";
-- return false;
-- }
-- }
-- }
--
-- // Adjust overall match for required prefix that we stripped off.
-- if (prefixlen > 0 && nsubmatch > 0)
-- submatch[0] = StringPiece(submatch[0].begin() - prefixlen,
-- submatch[0].size() + prefixlen);
--
-- // Zero submatches that don't exist in the regexp.
-- for (int i = ncap; i < nsubmatch; i++)
-- submatch[i] = NULL;
-- return true;
--}
--
--// Internal matcher - like Match() but takes Args not StringPieces.
--bool RE2::DoMatch(const StringPiece& text,
-- Anchor anchor,
-- int* consumed,
-- const Arg* const* args,
-- int n) const {
-- if (!ok()) {
-- if (options_.log_errors())
-- LOG(ERROR) << "Invalid RE2: " << *error_;
-- return false;
-- }
--
-- // Count number of capture groups needed.
-- int nvec;
-- if (n == 0 && consumed == NULL)
-- nvec = 0;
-- else
-- nvec = n+1;
--
-- StringPiece* vec;
-- StringPiece stkvec[kVecSize];
-- StringPiece* heapvec = NULL;
--
-- if (nvec <= arraysize(stkvec)) {
-- vec = stkvec;
-- } else {
-- vec = new StringPiece[nvec];
-- heapvec = vec;
-- }
--
-- if (!Match(text, 0, text.size(), anchor, vec, nvec)) {
-- delete[] heapvec;
-- return false;
-- }
--
-- if(consumed != NULL)
-- *consumed = vec[0].end() - text.begin();
--
-- if (n == 0 || args == NULL) {
-- // We are not interested in results
-- delete[] heapvec;
-- return true;
-- }
--
-- int ncap = NumberOfCapturingGroups();
-- if (ncap < n) {
-- // RE has fewer capturing groups than number of arg pointers passed in
-- VLOG(1) << "Asked for " << n << " but only have " << ncap;
-- delete[] heapvec;
-- return false;
-- }
--
-- // If we got here, we must have matched the whole pattern.
-- for (int i = 0; i < n; i++) {
-- const StringPiece& s = vec[i+1];
-- if (!args[i]->Parse(s.data(), s.size())) {
-- // TODO: Should we indicate what the error was?
-- VLOG(1) << "Parse error on #" << i << " " << s << " "
-- << (void*)s.data() << "/" << s.size();
-- delete[] heapvec;
-- return false;
-- }
-- }
--
-- delete[] heapvec;
-- return true;
--}
--
--// Append the "rewrite" string, with backslash subsitutions from "vec",
--// to string "out".
--bool RE2::Rewrite(string *out, const StringPiece &rewrite,
-- const StringPiece *vec, int veclen) const {
-- for (const char *s = rewrite.data(), *end = s + rewrite.size();
-- s < end; s++) {
-- int c = *s;
-- if (c == '\\') {
-- s++;
-- c = (s < end) ? *s : -1;
-- if (isdigit(c)) {
-- int n = (c - '0');
-- if (n >= veclen) {
-- LOG(ERROR) << "requested group " << n
-- << " in regexp " << rewrite.data();
-- return false;
-- }
-- StringPiece snip = vec[n];
-- if (snip.size() > 0)
-- out->append(snip.data(), snip.size());
-- } else if (c == '\\') {
-- out->push_back('\\');
-- } else {
-- LOG(ERROR) << "invalid rewrite pattern: " << rewrite.data();
-- return false;
-- }
-- } else {
-- out->push_back(c);
-- }
-- }
-- return true;
--}
--
--// Return the number of capturing subpatterns, or -1 if the
--// regexp wasn't valid on construction.
--int RE2::NumberOfCapturingGroups() const {
-- if (suffix_regexp_ == NULL)
-- return -1;
-- ANNOTATE_BENIGN_RACE(&num_captures_, "benign race: in the worst case"
-- " multiple threads end up doing the same work in parallel.");
-- if (num_captures_ == -1)
-- num_captures_ = suffix_regexp_->NumCaptures();
-- return num_captures_;
--}
--
--// Checks that the rewrite string is well-formed with respect to this
--// regular expression.
--bool RE2::CheckRewriteString(const StringPiece& rewrite, string* error) const {
-- int max_token = -1;
-- for (const char *s = rewrite.data(), *end = s + rewrite.size();
-- s < end; s++) {
-- int c = *s;
-- if (c != '\\') {
-- continue;
-- }
-- if (++s == end) {
-- *error = "Rewrite schema error: '\\' not allowed at end.";
-- return false;
-- }
-- c = *s;
-- if (c == '\\') {
-- continue;
-- }
-- if (!isdigit(c)) {
-- *error = "Rewrite schema error: "
-- "'\\' must be followed by a digit or '\\'.";
-- return false;
-- }
-- int n = (c - '0');
-- if (max_token < n) {
-- max_token = n;
-- }
-- }
--
-- if (max_token > NumberOfCapturingGroups()) {
-- SStringPrintf(error, "Rewrite schema requests %d matches, "
-- "but the regexp only has %d parenthesized subexpressions.",
-- max_token, NumberOfCapturingGroups());
-- return false;
-- }
-- return true;
--}
--
--/***** Parsers for various types *****/
--
--bool RE2::Arg::parse_null(const char* str, int n, void* dest) {
-- // We fail if somebody asked us to store into a non-NULL void* pointer
-- return (dest == NULL);
--}
--
--bool RE2::Arg::parse_string(const char* str, int n, void* dest) {
-- if (dest == NULL) return true;
-- reinterpret_cast<string*>(dest)->assign(str, n);
-- return true;
--}
--
--bool RE2::Arg::parse_stringpiece(const char* str, int n, void* dest) {
-- if (dest == NULL) return true;
-- reinterpret_cast<StringPiece*>(dest)->set(str, n);
-- return true;
--}
--
--bool RE2::Arg::parse_char(const char* str, int n, void* dest) {
-- if (n != 1) return false;
-- if (dest == NULL) return true;
-- *(reinterpret_cast<char*>(dest)) = str[0];
-- return true;
--}
--
--bool RE2::Arg::parse_uchar(const char* str, int n, void* dest) {
-- if (n != 1) return false;
-- if (dest == NULL) return true;
-- *(reinterpret_cast<unsigned char*>(dest)) = str[0];
-- return true;
--}
--
--// Largest number spec that we are willing to parse
--static const int kMaxNumberLength = 32;
--
--// REQUIRES "buf" must have length at least kMaxNumberLength+1
--// Copies "str" into "buf" and null-terminates.
--// Overwrites *np with the new length.
--static const char* TerminateNumber(char* buf, const char* str, int* np) {
-- int n = *np;
-- if (n <= 0) return "";
-- if (n > 0 && isspace(*str)) {
-- // We are less forgiving than the strtoxxx() routines and do not
-- // allow leading spaces.
-- return "";
-- }
--
-- // Although buf has a fixed maximum size, we can still handle
-- // arbitrarily large integers correctly by omitting leading zeros.
-- // (Numbers that are still too long will be out of range.)
-- // Before deciding whether str is too long,
-- // remove leading zeros with s/000+/00/.
-- // Leaving the leading two zeros in place means that
-- // we don't change 0000x123 (invalid) into 0x123 (valid).
-- // Skip over leading - before replacing.
-- bool neg = false;
-- if (n >= 1 && str[0] == '-') {
-- neg = true;
-- n--;
-- str++;
-- }
--
-- if (n >= 3 && str[0] == '0' && str[1] == '0') {
-- while (n >= 3 && str[2] == '0') {
-- n--;
-- str++;
-- }
-- }
--
-- if (neg) { // make room in buf for -
-- n++;
-- str--;
-- }
--
-- if (n > kMaxNumberLength) return "";
--
-- memmove(buf, str, n);
-- if (neg) {
-- buf[0] = '-';
-- }
-- buf[n] = '\0';
-- *np = n;
-- return buf;
--}
--
--bool RE2::Arg::parse_long_radix(const char* str,
-- int n,
-- void* dest,
-- int radix) {
-- if (n == 0) return false;
-- char buf[kMaxNumberLength+1];
-- str = TerminateNumber(buf, str, &n);
-- char* end;
-- errno = 0;
-- long r = strtol(str, &end, radix);
-- if (end != str + n) return false; // Leftover junk
-- if (errno) return false;
-- if (dest == NULL) return true;
-- *(reinterpret_cast<long*>(dest)) = r;
-- return true;
--}
--
--bool RE2::Arg::parse_ulong_radix(const char* str,
-- int n,
-- void* dest,
-- int radix) {
-- if (n == 0) return false;
-- char buf[kMaxNumberLength+1];
-- str = TerminateNumber(buf, str, &n);
-- if (str[0] == '-') {
-- // strtoul() will silently accept negative numbers and parse
-- // them. This module is more strict and treats them as errors.
-- return false;
-- }
--
-- char* end;
-- errno = 0;
-- unsigned long r = strtoul(str, &end, radix);
-- if (end != str + n) return false; // Leftover junk
-- if (errno) return false;
-- if (dest == NULL) return true;
-- *(reinterpret_cast<unsigned long*>(dest)) = r;
-- return true;
--}
--
--bool RE2::Arg::parse_short_radix(const char* str,
-- int n,
-- void* dest,
-- int radix) {
-- long r;
-- if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
-- if ((short)r != r) return false; // Out of range
-- if (dest == NULL) return true;
-- *(reinterpret_cast<short*>(dest)) = r;
-- return true;
--}
--
--bool RE2::Arg::parse_ushort_radix(const char* str,
-- int n,
-- void* dest,
-- int radix) {
-- unsigned long r;
-- if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
-- if ((ushort)r != r) return false; // Out of range
-- if (dest == NULL) return true;
-- *(reinterpret_cast<unsigned short*>(dest)) = r;
-- return true;
--}
--
--bool RE2::Arg::parse_int_radix(const char* str,
-- int n,
-- void* dest,
-- int radix) {
-- long r;
-- if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
-- if ((int)r != r) return false; // Out of range
-- if (dest == NULL) return true;
-- *(reinterpret_cast<int*>(dest)) = r;
-- return true;
--}
--
--bool RE2::Arg::parse_uint_radix(const char* str,
-- int n,
-- void* dest,
-- int radix) {
-- unsigned long r;
-- if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
-- if ((uint)r != r) return false; // Out of range
-- if (dest == NULL) return true;
-- *(reinterpret_cast<unsigned int*>(dest)) = r;
-- return true;
--}
--
--bool RE2::Arg::parse_longlong_radix(const char* str,
-- int n,
-- void* dest,
-- int radix) {
-- if (n == 0) return false;
-- char buf[kMaxNumberLength+1];
-- str = TerminateNumber(buf, str, &n);
-- char* end;
-- errno = 0;
-- int64 r = strtoll(str, &end, radix);
-- if (end != str + n) return false; // Leftover junk
-- if (errno) return false;
-- if (dest == NULL) return true;
-- *(reinterpret_cast<int64*>(dest)) = r;
-- return true;
--}
--
--bool RE2::Arg::parse_ulonglong_radix(const char* str,
-- int n,
-- void* dest,
-- int radix) {
-- if (n == 0) return false;
-- char buf[kMaxNumberLength+1];
-- str = TerminateNumber(buf, str, &n);
-- if (str[0] == '-') {
-- // strtoull() will silently accept negative numbers and parse
-- // them. This module is more strict and treats them as errors.
-- return false;
-- }
-- char* end;
-- errno = 0;
-- uint64 r = strtoull(str, &end, radix);
-- if (end != str + n) return false; // Leftover junk
-- if (errno) return false;
-- if (dest == NULL) return true;
-- *(reinterpret_cast<uint64*>(dest)) = r;
-- return true;
--}
--
--static bool parse_double_float(const char* str, int n, bool isfloat, void *dest) {
-- if (n == 0) return false;
-- static const int kMaxLength = 200;
-- char buf[kMaxLength];
-- if (n >= kMaxLength) return false;
-- memcpy(buf, str, n);
-- buf[n] = '\0';
-- errno = 0;
-- char* end;
-- double r;
-- if (isfloat) {
-- r = strtof(buf, &end);
-- } else {
-- r = strtod(buf, &end);
-- }
-- if (end != buf + n) return false; // Leftover junk
-- if (errno) return false;
-- if (dest == NULL) return true;
-- if (isfloat) {
-- *(reinterpret_cast<float*>(dest)) = r;
-- } else {
-- *(reinterpret_cast<double*>(dest)) = r;
-- }
-- return true;
--}
--
--bool RE2::Arg::parse_double(const char* str, int n, void* dest) {
-- return parse_double_float(str, n, false, dest);
--}
--
--bool RE2::Arg::parse_float(const char* str, int n, void* dest) {
-- return parse_double_float(str, n, true, dest);
--}
--
--
--#define DEFINE_INTEGER_PARSERS(name) \
-- bool RE2::Arg::parse_##name(const char* str, int n, void* dest) { \
-- return parse_##name##_radix(str, n, dest, 10); \
-- } \
-- bool RE2::Arg::parse_##name##_hex(const char* str, int n, void* dest) { \
-- return parse_##name##_radix(str, n, dest, 16); \
-- } \
-- bool RE2::Arg::parse_##name##_octal(const char* str, int n, void* dest) { \
-- return parse_##name##_radix(str, n, dest, 8); \
-- } \
-- bool RE2::Arg::parse_##name##_cradix(const char* str, int n, void* dest) { \
-- return parse_##name##_radix(str, n, dest, 0); \
-- }
--
--DEFINE_INTEGER_PARSERS(short);
--DEFINE_INTEGER_PARSERS(ushort);
--DEFINE_INTEGER_PARSERS(int);
--DEFINE_INTEGER_PARSERS(uint);
--DEFINE_INTEGER_PARSERS(long);
--DEFINE_INTEGER_PARSERS(ulong);
--DEFINE_INTEGER_PARSERS(longlong);
--DEFINE_INTEGER_PARSERS(ulonglong);
--
--#undef DEFINE_INTEGER_PARSERS
--
--} // namespace re2
-diff --git a/re2/re2/re2.h b/re2/re2/re2.h
-deleted file mode 100644
-index 9dbc99c..0000000
---- a/re2/re2/re2.h
-+++ /dev/null
-@@ -1,837 +0,0 @@
--// Copyright 2003-2009 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--#ifndef RE2_RE2_H
--#define RE2_RE2_H
--
--// C++ interface to the re2 regular-expression library.
--// RE2 supports Perl-style regular expressions (with extensions like
--// \d, \w, \s, ...).
--//
--// -----------------------------------------------------------------------
--// REGEXP SYNTAX:
--//
--// This module uses the re2 library and hence supports
--// its syntax for regular expressions, which is similar to Perl's with
--// some of the more complicated things thrown away. In particular,
--// backreferences and generalized assertions are not available, nor is \Z.
--//
--// See http://code.google.com/p/re2/wiki/Syntax for the syntax
--// supported by RE2, and a comparison with PCRE and PERL regexps.
--//
--// For those not familiar with Perl's regular expressions,
--// here are some examples of the most commonly used extensions:
--//
--// "hello (\\w+) world" -- \w matches a "word" character
--// "version (\\d+)" -- \d matches a digit
--// "hello\\s+world" -- \s matches any whitespace character
--// "\\b(\\w+)\\b" -- \b matches non-empty string at word boundary
--// "(?i)hello" -- (?i) turns on case-insensitive matching
--// "/\\*(.*?)\\*/" -- .*? matches . minimum no. of times possible
--//
--// -----------------------------------------------------------------------
--// MATCHING INTERFACE:
--//
--// The "FullMatch" operation checks that supplied text matches a
--// supplied pattern exactly.
--//
--// Example: successful match
--// CHECK(RE2::FullMatch("hello", "h.*o"));
--//
--// Example: unsuccessful match (requires full match):
--// CHECK(!RE2::FullMatch("hello", "e"));
--//
--// -----------------------------------------------------------------------
--// UTF-8 AND THE MATCHING INTERFACE:
--//
--// By default, the pattern and input text are interpreted as UTF-8.
--// The RE2::Latin1 option causes them to be interpreted as Latin-1.
--//
--// Example:
--// CHECK(RE2::FullMatch(utf8_string, RE2(utf8_pattern)));
--// CHECK(RE2::FullMatch(latin1_string, RE2(latin1_pattern, RE2::Latin1)));
--//
--// -----------------------------------------------------------------------
--// MATCHING WITH SUB-STRING EXTRACTION:
--//
--// You can supply extra pointer arguments to extract matched subpieces.
--//
--// Example: extracts "ruby" into "s" and 1234 into "i"
--// int i;
--// string s;
--// CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
--//
--// Example: fails because string cannot be stored in integer
--// CHECK(!RE2::FullMatch("ruby", "(.*)", &i));
--//
--// Example: fails because there aren't enough sub-patterns:
--// CHECK(!RE2::FullMatch("ruby:1234", "\\w+:\\d+", &s));
--//
--// Example: does not try to extract any extra sub-patterns
--// CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s));
--//
--// Example: does not try to extract into NULL
--// CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", NULL, &i));
--//
--// Example: integer overflow causes failure
--// CHECK(!RE2::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i));
--//
--// NOTE(rsc): Asking for substrings slows successful matches quite a bit.
--// This may get a little faster in the future, but right now is slower
--// than PCRE. On the other hand, failed matches run *very* fast (faster
--// than PCRE), as do matches without substring extraction.
--//
--// -----------------------------------------------------------------------
--// PARTIAL MATCHES
--//
--// You can use the "PartialMatch" operation when you want the pattern
--// to match any substring of the text.
--//
--// Example: simple search for a string:
--// CHECK(RE2::PartialMatch("hello", "ell"));
--//
--// Example: find first number in a string
--// int number;
--// CHECK(RE2::PartialMatch("x*100 + 20", "(\\d+)", &number));
--// CHECK_EQ(number, 100);
--//
--// -----------------------------------------------------------------------
--// PRE-COMPILED REGULAR EXPRESSIONS
--//
--// RE2 makes it easy to use any string as a regular expression, without
--// requiring a separate compilation step.
--//
--// If speed is of the essence, you can create a pre-compiled "RE2"
--// object from the pattern and use it multiple times. If you do so,
--// you can typically parse text faster than with sscanf.
--//
--// Example: precompile pattern for faster matching:
--// RE2 pattern("h.*o");
--// while (ReadLine(&str)) {
--// if (RE2::FullMatch(str, pattern)) ...;
--// }
--//
--// -----------------------------------------------------------------------
--// SCANNING TEXT INCREMENTALLY
--//
--// The "Consume" operation may be useful if you want to repeatedly
--// match regular expressions at the front of a string and skip over
--// them as they match. This requires use of the "StringPiece" type,
--// which represents a sub-range of a real string.
--//
--// Example: read lines of the form "var = value" from a string.
--// string contents = ...; // Fill string somehow
--// StringPiece input(contents); // Wrap a StringPiece around it
--//
--// string var;
--// int value;
--// while (RE2::Consume(&input, "(\\w+) = (\\d+)\n", &var, &value)) {
--// ...;
--// }
--//
--// Each successful call to "Consume" will set "var/value", and also
--// advance "input" so it points past the matched text. Note that if the
--// regular expression matches an empty string, input will advance
--// by 0 bytes. If the regular expression being used might match
--// an empty string, the loop body must check for this case and either
--// advance the string or break out of the loop.
--//
--// The "FindAndConsume" operation is similar to "Consume" but does not
--// anchor your match at the beginning of the string. For example, you
--// could extract all words from a string by repeatedly calling
--// RE2::FindAndConsume(&input, "(\\w+)", &word)
--//
--// -----------------------------------------------------------------------
--// USING VARIABLE NUMBER OF ARGUMENTS
--//
--// The above operations require you to know the number of arguments
--// when you write the code. This is not always possible or easy (for
--// example, the regular expression may be calculated at run time).
--// You can use the "N" version of the operations when the number of
--// match arguments are determined at run time.
--//
--// Example:
--// const RE2::Arg* args[10];
--// int n;
--// // ... populate args with pointers to RE2::Arg values ...
--// // ... set n to the number of RE2::Arg objects ...
--// bool match = RE2::FullMatchN(input, pattern, args, n);
--//
--// The last statement is equivalent to
--//
--// bool match = RE2::FullMatch(input, pattern,
--// *args[0], *args[1], ..., *args[n - 1]);
--//
--// -----------------------------------------------------------------------
--// PARSING HEX/OCTAL/C-RADIX NUMBERS
--//
--// By default, if you pass a pointer to a numeric value, the
--// corresponding text is interpreted as a base-10 number. You can
--// instead wrap the pointer with a call to one of the operators Hex(),
--// Octal(), or CRadix() to interpret the text in another base. The
--// CRadix operator interprets C-style "0" (base-8) and "0x" (base-16)
--// prefixes, but defaults to base-10.
--//
--// Example:
--// int a, b, c, d;
--// CHECK(RE2::FullMatch("100 40 0100 0x40", "(.*) (.*) (.*) (.*)",
--// RE2::Octal(&a), RE2::Hex(&b), RE2::CRadix(&c), RE2::CRadix(&d));
--// will leave 64 in a, b, c, and d.
--
--
--#include <stdint.h>
--#include <map>
--#include <string>
--#include "re2/stringpiece.h"
--#include "re2/variadic_function.h"
--
--namespace re2 {
--using std::string;
--using std::map;
--class Mutex;
--class Prog;
--class Regexp;
--
--// Interface for regular expression matching. Also corresponds to a
--// pre-compiled regular expression. An "RE2" object is safe for
--// concurrent use by multiple threads.
--class RE2 {
-- public:
-- // We convert user-passed pointers into special Arg objects
-- class Arg;
-- class Options;
--
-- // Defined in set.h.
-- class Set;
--
-- enum ErrorCode {
-- NoError = 0,
--
-- // Unexpected error
-- ErrorInternal,
--
-- // Parse errors
-- ErrorBadEscape, // bad escape sequence
-- ErrorBadCharClass, // bad character class
-- ErrorBadCharRange, // bad character class range
-- ErrorMissingBracket, // missing closing ]
-- ErrorMissingParen, // missing closing )
-- ErrorTrailingBackslash, // trailing \ at end of regexp
-- ErrorRepeatArgument, // repeat argument missing, e.g. "*"
-- ErrorRepeatSize, // bad repetition argument
-- ErrorRepeatOp, // bad repetition operator
-- ErrorBadPerlOp, // bad perl operator
-- ErrorBadUTF8, // invalid UTF-8 in regexp
-- ErrorBadNamedCapture, // bad named capture group
-- ErrorPatternTooLarge, // pattern too large (compile failed)
-- };
--
-- // Predefined common options.
-- // If you need more complicated things, instantiate
-- // an Option class, change the settings, and pass it to the
-- // RE2 constructor.
-- static const Options DefaultOptions;
-- static const Options Latin1; // treat input as Latin-1 (default UTF-8)
-- static const Options POSIX; // POSIX syntax, leftmost-longest match
-- static const Options Quiet; // do not log about regexp parse errors
--
-- // Need to have the const char* and const string& forms for implicit
-- // conversions when passing string literals to FullMatch and PartialMatch.
-- // Otherwise the StringPiece form would be sufficient.
--#ifndef SWIG
-- RE2(const char* pattern);
-- RE2(const string& pattern);
--#endif
-- RE2(const StringPiece& pattern);
-- RE2(const StringPiece& pattern, const Options& option);
-- ~RE2();
--
-- // Returns whether RE2 was created properly.
-- bool ok() const { return error_code() == NoError; }
--
-- // The string specification for this RE2. E.g.
-- // RE2 re("ab*c?d+");
-- // re.pattern(); // "ab*c?d+"
-- const string& pattern() const { return pattern_; }
--
-- // If RE2 could not be created properly, returns an error string.
-- // Else returns the empty string.
-- const string& error() const { return *error_; }
--
-- // If RE2 could not be created properly, returns an error code.
-- // Else returns RE2::NoError (== 0).
-- ErrorCode error_code() const { return error_code_; }
--
-- // If RE2 could not be created properly, returns the offending
-- // portion of the regexp.
-- const string& error_arg() const { return error_arg_; }
--
-- // Returns the program size, a very approximate measure of a regexp's "cost".
-- // Larger numbers are more expensive than smaller numbers.
-- int ProgramSize() const;
--
-- // Returns the underlying Regexp; not for general use.
-- // Returns entire_regexp_ so that callers don't need
-- // to know about prefix_ and prefix_foldcase_.
-- re2::Regexp* Regexp() const { return entire_regexp_; }
--
-- /***** The useful part: the matching interface *****/
--
-- // Matches "text" against "pattern". If pointer arguments are
-- // supplied, copies matched sub-patterns into them.
-- //
-- // You can pass in a "const char*" or a "string" for "text".
-- // You can pass in a "const char*" or a "string" or a "RE2" for "pattern".
-- //
-- // The provided pointer arguments can be pointers to any scalar numeric
-- // type, or one of:
-- // string (matched piece is copied to string)
-- // StringPiece (StringPiece is mutated to point to matched piece)
-- // T (where "bool T::ParseFrom(const char*, int)" exists)
-- // (void*)NULL (the corresponding matched sub-pattern is not copied)
-- //
-- // Returns true iff all of the following conditions are satisfied:
-- // a. "text" matches "pattern" exactly
-- // b. The number of matched sub-patterns is >= number of supplied pointers
-- // c. The "i"th argument has a suitable type for holding the
-- // string captured as the "i"th sub-pattern. If you pass in
-- // NULL for the "i"th argument, or pass fewer arguments than
-- // number of sub-patterns, "i"th captured sub-pattern is
-- // ignored.
-- //
-- // CAVEAT: An optional sub-pattern that does not exist in the
-- // matched string is assigned the empty string. Therefore, the
-- // following will return false (because the empty string is not a
-- // valid number):
-- // int number;
-- // RE2::FullMatch("abc", "[a-z]+(\\d+)?", &number);
-- static bool FullMatchN(const StringPiece& text, const RE2& re,
-- const Arg* const args[], int argc);
-- static const VariadicFunction2<
-- bool, const StringPiece&, const RE2&, Arg, RE2::FullMatchN> FullMatch;
--
-- // Exactly like FullMatch(), except that "pattern" is allowed to match
-- // a substring of "text".
-- static bool PartialMatchN(const StringPiece& text, const RE2& re, // 3..16 args
-- const Arg* const args[], int argc);
-- static const VariadicFunction2<
-- bool, const StringPiece&, const RE2&, Arg, RE2::PartialMatchN> PartialMatch;
--
-- // Like FullMatch() and PartialMatch(), except that pattern has to
-- // match a prefix of "text", and "input" is advanced past the matched
-- // text. Note: "input" is modified iff this routine returns true.
-- static bool ConsumeN(StringPiece* input, const RE2& pattern, // 3..16 args
-- const Arg* const args[], int argc);
-- static const VariadicFunction2<
-- bool, StringPiece*, const RE2&, Arg, RE2::ConsumeN> Consume;
--
-- // Like Consume(..), but does not anchor the match at the beginning of the
-- // string. That is, "pattern" need not start its match at the beginning of
-- // "input". For example, "FindAndConsume(s, "(\\w+)", &word)" finds the next
-- // word in "s" and stores it in "word".
-- static bool FindAndConsumeN(StringPiece* input, const RE2& pattern,
-- const Arg* const args[], int argc);
-- static const VariadicFunction2<
-- bool, StringPiece*, const RE2&, Arg, RE2::FindAndConsumeN> FindAndConsume;
--
-- // Replace the first match of "pattern" in "str" with "rewrite".
-- // Within "rewrite", backslash-escaped digits (\1 to \9) can be
-- // used to insert text matching corresponding parenthesized group
-- // from the pattern. \0 in "rewrite" refers to the entire matching
-- // text. E.g.,
-- //
-- // string s = "yabba dabba doo";
-- // CHECK(RE2::Replace(&s, "b+", "d"));
-- //
-- // will leave "s" containing "yada dabba doo"
-- //
-- // Returns true if the pattern matches and a replacement occurs,
-- // false otherwise.
-- static bool Replace(string *str,
-- const RE2& pattern,
-- const StringPiece& rewrite);
--
-- // Like Replace(), except replaces successive non-overlapping occurrences
-- // of the pattern in the string with the rewrite. E.g.
-- //
-- // string s = "yabba dabba doo";
-- // CHECK(RE2::GlobalReplace(&s, "b+", "d"));
-- //
-- // will leave "s" containing "yada dada doo"
-- // Replacements are not subject to re-matching.
-- //
-- // Because GlobalReplace only replaces non-overlapping matches,
-- // replacing "ana" within "banana" makes only one replacement, not two.
-- //
-- // Returns the number of replacements made.
-- static int GlobalReplace(string *str,
-- const RE2& pattern,
-- const StringPiece& rewrite);
--
-- // Like Replace, except that if the pattern matches, "rewrite"
-- // is copied into "out" with substitutions. The non-matching
-- // portions of "text" are ignored.
-- //
-- // Returns true iff a match occurred and the extraction happened
-- // successfully; if no match occurs, the string is left unaffected.
-- static bool Extract(const StringPiece &text,
-- const RE2& pattern,
-- const StringPiece &rewrite,
-- string *out);
--
-- // Escapes all potentially meaningful regexp characters in
-- // 'unquoted'. The returned string, used as a regular expression,
-- // will exactly match the original string. For example,
-- // 1.5-2.0?
-- // may become:
-- // 1\.5\-2\.0\?
-- static string QuoteMeta(const StringPiece& unquoted);
--
-- // Computes range for any strings matching regexp. The min and max can in
-- // some cases be arbitrarily precise, so the caller gets to specify the
-- // maximum desired length of string returned.
-- //
-- // Assuming PossibleMatchRange(&min, &max, N) returns successfully, any
-- // string s that is an anchored match for this regexp satisfies
-- // min <= s && s <= max.
-- //
-- // Note that PossibleMatchRange() will only consider the first copy of an
-- // infinitely repeated element (i.e., any regexp element followed by a '*' or
-- // '+' operator). Regexps with "{N}" constructions are not affected, as those
-- // do not compile down to infinite repetitions.
-- //
-- // Returns true on success, false on error.
-- bool PossibleMatchRange(string* min, string* max, int maxlen) const;
--
-- // Generic matching interface
--
-- // Type of match.
-- enum Anchor {
-- UNANCHORED, // No anchoring
-- ANCHOR_START, // Anchor at start only
-- ANCHOR_BOTH, // Anchor at start and end
-- };
--
-- // Return the number of capturing subpatterns, or -1 if the
-- // regexp wasn't valid on construction. The overall match ($0)
-- // does not count: if the regexp is "(a)(b)", returns 2.
-- int NumberOfCapturingGroups() const;
--
--
-- // Return a map from names to capturing indices.
-- // The map records the index of the leftmost group
-- // with the given name.
-- // Only valid until the re is deleted.
-- const map<string, int>& NamedCapturingGroups() const;
--
-- // Return a map from capturing indices to names.
-- // The map has no entries for unnamed groups.
-- // Only valid until the re is deleted.
-- const map<int, string>& CapturingGroupNames() const;
--
-- // General matching routine.
-- // Match against text starting at offset startpos
-- // and stopping the search at offset endpos.
-- // Returns true if match found, false if not.
-- // On a successful match, fills in match[] (up to nmatch entries)
-- // with information about submatches.
-- // I.e. matching RE2("(foo)|(bar)baz") on "barbazbla" will return true,
-- // setting match[0] = "barbaz", match[1] = NULL, match[2] = "bar",
-- // match[3] = NULL, ..., up to match[nmatch-1] = NULL.
-- //
-- // Don't ask for more match information than you will use:
-- // runs much faster with nmatch == 1 than nmatch > 1, and
-- // runs even faster if nmatch == 0.
-- // Doesn't make sense to use nmatch > 1 + NumberOfCapturingGroups(),
-- // but will be handled correctly.
-- //
-- // Passing text == StringPiece(NULL, 0) will be handled like any other
-- // empty string, but note that on return, it will not be possible to tell
-- // whether submatch i matched the empty string or did not match:
-- // either way, match[i] == NULL.
-- bool Match(const StringPiece& text,
-- int startpos,
-- int endpos,
-- Anchor anchor,
-- StringPiece *match,
-- int nmatch) const;
--
-- // Check that the given rewrite string is suitable for use with this
-- // regular expression. It checks that:
-- // * The regular expression has enough parenthesized subexpressions
-- // to satisfy all of the \N tokens in rewrite
-- // * The rewrite string doesn't have any syntax errors. E.g.,
-- // '\' followed by anything other than a digit or '\'.
-- // A true return value guarantees that Replace() and Extract() won't
-- // fail because of a bad rewrite string.
-- bool CheckRewriteString(const StringPiece& rewrite, string* error) const;
--
-- // Constructor options
-- class Options {
-- public:
-- // The options are (defaults in parentheses):
-- //
-- // utf8 (true) text and pattern are UTF-8; otherwise Latin-1
-- // posix_syntax (false) restrict regexps to POSIX egrep syntax
-- // longest_match (false) search for longest match, not first match
-- // log_errors (true) log syntax and execution errors to ERROR
-- // max_mem (see below) approx. max memory footprint of RE2
-- // literal (false) interpret string as literal, not regexp
-- // never_nl (false) never match \n, even if it is in regexp
-- // case_sensitive (true) match is case-sensitive (regexp can override
-- // with (?i) unless in posix_syntax mode)
-- //
-- // The following options are only consulted when posix_syntax == true.
-- // (When posix_syntax == false these features are always enabled and
-- // cannot be turned off.)
-- // perl_classes (false) allow Perl's \d \s \w \D \S \W
-- // word_boundary (false) allow Perl's \b \B (word boundary and not)
-- // one_line (false) ^ and $ only match beginning and end of text
-- //
-- // The max_mem option controls how much memory can be used
-- // to hold the compiled form of the regexp (the Prog) and
-- // its cached DFA graphs. Code Search placed limits on the number
-- // of Prog instructions and DFA states: 10,000 for both.
-- // In RE2, those limits would translate to about 240 KB per Prog
-- // and perhaps 2.5 MB per DFA (DFA state sizes vary by regexp; RE2 does a
-- // better job of keeping them small than Code Search did).
-- // Each RE2 has two Progs (one forward, one reverse), and each Prog
-- // can have two DFAs (one first match, one longest match).
-- // That makes 4 DFAs:
-- //
-- // forward, first-match - used for UNANCHORED or ANCHOR_LEFT searches
-- // if opt.longest_match() == false
-- // forward, longest-match - used for all ANCHOR_BOTH searches,
-- // and the other two kinds if
-- // opt.longest_match() == true
-- // reverse, first-match - never used
-- // reverse, longest-match - used as second phase for unanchored searches
-- //
-- // The RE2 memory budget is statically divided between the two
-- // Progs and then the DFAs: two thirds to the forward Prog
-- // and one third to the reverse Prog. The forward Prog gives half
-- // of what it has left over to each of its DFAs. The reverse Prog
-- // gives it all to its longest-match DFA.
-- //
-- // Once a DFA fills its budget, it flushes its cache and starts over.
-- // If this happens too often, RE2 falls back on the NFA implementation.
--
-- // For now, make the default budget something close to Code Search.
-- static const int kDefaultMaxMem = 8<<20;
--
-- enum Encoding {
-- EncodingUTF8 = 1,
-- EncodingLatin1
-- };
--
-- Options() :
-- encoding_(EncodingUTF8),
-- posix_syntax_(false),
-- longest_match_(false),
-- log_errors_(true),
-- max_mem_(kDefaultMaxMem),
-- literal_(false),
-- never_nl_(false),
-- case_sensitive_(true),
-- perl_classes_(false),
-- word_boundary_(false),
-- one_line_(false) {
-- }
--
-- Encoding encoding() const { return encoding_; }
-- void set_encoding(Encoding encoding) { encoding_ = encoding; }
--
-- // Legacy interface to encoding.
-- // TODO(rsc): Remove once clients have been converted.
-- bool utf8() const { return encoding_ == EncodingUTF8; }
-- void set_utf8(bool b) {
-- if (b) {
-- encoding_ = EncodingUTF8;
-- } else {
-- encoding_ = EncodingLatin1;
-- }
-- }
--
-- bool posix_syntax() const { return posix_syntax_; }
-- void set_posix_syntax(bool b) { posix_syntax_ = b; }
--
-- bool longest_match() const { return longest_match_; }
-- void set_longest_match(bool b) { longest_match_ = b; }
--
-- bool log_errors() const { return log_errors_; }
-- void set_log_errors(bool b) { log_errors_ = b; }
--
-- int max_mem() const { return max_mem_; }
-- void set_max_mem(int m) { max_mem_ = m; }
--
-- bool literal() const { return literal_; }
-- void set_literal(bool b) { literal_ = b; }
--
-- bool never_nl() const { return never_nl_; }
-- void set_never_nl(bool b) { never_nl_ = b; }
--
-- bool case_sensitive() const { return case_sensitive_; }
-- void set_case_sensitive(bool b) { case_sensitive_ = b; }
--
-- bool perl_classes() const { return perl_classes_; }
-- void set_perl_classes(bool b) { perl_classes_ = b; }
--
-- bool word_boundary() const { return word_boundary_; }
-- void set_word_boundary(bool b) { word_boundary_ = b; }
--
-- bool one_line() const { return one_line_; }
-- void set_one_line(bool b) { one_line_ = b; }
--
-- void Copy(const Options& src) {
-- encoding_ = src.encoding_;
-- posix_syntax_ = src.posix_syntax_;
-- longest_match_ = src.longest_match_;
-- log_errors_ = src.log_errors_;
-- max_mem_ = src.max_mem_;
-- literal_ = src.literal_;
-- never_nl_ = src.never_nl_;
-- case_sensitive_ = src.case_sensitive_;
-- perl_classes_ = src.perl_classes_;
-- word_boundary_ = src.word_boundary_;
-- one_line_ = src.one_line_;
-- }
--
-- int ParseFlags() const;
--
-- private:
-- // Private constructor for defining constants like RE2::Latin1.
-- friend class RE2;
-- Options(Encoding encoding,
-- bool posix_syntax,
-- bool longest_match,
-- bool log_errors) :
-- encoding_(encoding),
-- posix_syntax_(posix_syntax),
-- longest_match_(longest_match),
-- log_errors_(log_errors),
-- max_mem_(kDefaultMaxMem),
-- literal_(false),
-- never_nl_(false),
-- case_sensitive_(true),
-- perl_classes_(false),
-- word_boundary_(false),
-- one_line_(false) {
-- }
--
-- Encoding encoding_;
-- bool posix_syntax_;
-- bool longest_match_;
-- bool log_errors_;
-- int64_t max_mem_;
-- bool literal_;
-- bool never_nl_;
-- bool case_sensitive_;
-- bool perl_classes_;
-- bool word_boundary_;
-- bool one_line_;
--
-- //DISALLOW_EVIL_CONSTRUCTORS(Options);
-- Options(const Options&);
-- void operator=(const Options&);
-- };
--
-- // Returns the options set in the constructor.
-- const Options& options() const { return options_; };
--
-- // Argument converters; see below.
-- static inline Arg CRadix(short* x);
-- static inline Arg CRadix(unsigned short* x);
-- static inline Arg CRadix(int* x);
-- static inline Arg CRadix(unsigned int* x);
-- static inline Arg CRadix(long* x);
-- static inline Arg CRadix(unsigned long* x);
-- static inline Arg CRadix(long long* x);
-- static inline Arg CRadix(unsigned long long* x);
--
-- static inline Arg Hex(short* x);
-- static inline Arg Hex(unsigned short* x);
-- static inline Arg Hex(int* x);
-- static inline Arg Hex(unsigned int* x);
-- static inline Arg Hex(long* x);
-- static inline Arg Hex(unsigned long* x);
-- static inline Arg Hex(long long* x);
-- static inline Arg Hex(unsigned long long* x);
--
-- static inline Arg Octal(short* x);
-- static inline Arg Octal(unsigned short* x);
-- static inline Arg Octal(int* x);
-- static inline Arg Octal(unsigned int* x);
-- static inline Arg Octal(long* x);
-- static inline Arg Octal(unsigned long* x);
-- static inline Arg Octal(long long* x);
-- static inline Arg Octal(unsigned long long* x);
--
-- private:
-- void Init(const StringPiece& pattern, const Options& options);
--
-- bool Rewrite(string *out,
-- const StringPiece &rewrite,
-- const StringPiece* vec,
-- int veclen) const;
--
-- bool DoMatch(const StringPiece& text,
-- Anchor anchor,
-- int* consumed,
-- const Arg* const args[],
-- int n) const;
--
-- re2::Prog* ReverseProg() const;
--
-- mutable Mutex* mutex_;
-- string pattern_; // string regular expression
-- Options options_; // option flags
-- string prefix_; // required prefix (before regexp_)
-- bool prefix_foldcase_; // prefix is ASCII case-insensitive
-- re2::Regexp* entire_regexp_; // parsed regular expression
-- re2::Regexp* suffix_regexp_; // parsed regular expression, prefix removed
-- re2::Prog* prog_; // compiled program for regexp
-- mutable re2::Prog* rprog_; // reverse program for regexp
-- bool is_one_pass_; // can use prog_->SearchOnePass?
-- mutable const string* error_; // Error indicator
-- // (or points to empty string)
-- mutable ErrorCode error_code_; // Error code
-- mutable string error_arg_; // Fragment of regexp showing error
-- mutable int num_captures_; // Number of capturing groups
--
-- // Map from capture names to indices
-- mutable const map<string, int>* named_groups_;
--
-- // Map from capture indices to names
-- mutable const map<int, string>* group_names_;
--
-- //DISALLOW_EVIL_CONSTRUCTORS(RE2);
-- RE2(const RE2&);
-- void operator=(const RE2&);
--};
--
--/***** Implementation details *****/
--
--// Hex/Octal/Binary?
--
--// Special class for parsing into objects that define a ParseFrom() method
--template <class T>
--class _RE2_MatchObject {
-- public:
-- static inline bool Parse(const char* str, int n, void* dest) {
-- if (dest == NULL) return true;
-- T* object = reinterpret_cast<T*>(dest);
-- return object->ParseFrom(str, n);
-- }
--};
--
--class RE2::Arg {
-- public:
-- // Empty constructor so we can declare arrays of RE2::Arg
-- Arg();
--
-- // Constructor specially designed for NULL arguments
-- Arg(void*);
--
-- typedef bool (*Parser)(const char* str, int n, void* dest);
--
--// Type-specific parsers
--#define MAKE_PARSER(type,name) \
-- Arg(type* p) : arg_(p), parser_(name) { } \
-- Arg(type* p, Parser parser) : arg_(p), parser_(parser) { } \
--
--
-- MAKE_PARSER(char, parse_char);
-- MAKE_PARSER(signed char, parse_char);
-- MAKE_PARSER(unsigned char, parse_uchar);
-- MAKE_PARSER(short, parse_short);
-- MAKE_PARSER(unsigned short, parse_ushort);
-- MAKE_PARSER(int, parse_int);
-- MAKE_PARSER(unsigned int, parse_uint);
-- MAKE_PARSER(long, parse_long);
-- MAKE_PARSER(unsigned long, parse_ulong);
-- MAKE_PARSER(long long, parse_longlong);
-- MAKE_PARSER(unsigned long long, parse_ulonglong);
-- MAKE_PARSER(float, parse_float);
-- MAKE_PARSER(double, parse_double);
-- MAKE_PARSER(string, parse_string);
-- MAKE_PARSER(StringPiece, parse_stringpiece);
--
--#undef MAKE_PARSER
--
-- // Generic constructor
-- template <class T> Arg(T*, Parser parser);
-- // Generic constructor template
-- template <class T> Arg(T* p)
-- : arg_(p), parser_(_RE2_MatchObject<T>::Parse) {
-- }
--
-- // Parse the data
-- bool Parse(const char* str, int n) const;
--
-- private:
-- void* arg_;
-- Parser parser_;
--
-- static bool parse_null (const char* str, int n, void* dest);
-- static bool parse_char (const char* str, int n, void* dest);
-- static bool parse_uchar (const char* str, int n, void* dest);
-- static bool parse_float (const char* str, int n, void* dest);
-- static bool parse_double (const char* str, int n, void* dest);
-- static bool parse_string (const char* str, int n, void* dest);
-- static bool parse_stringpiece (const char* str, int n, void* dest);
--
--#define DECLARE_INTEGER_PARSER(name) \
-- private: \
-- static bool parse_ ## name(const char* str, int n, void* dest); \
-- static bool parse_ ## name ## _radix( \
-- const char* str, int n, void* dest, int radix); \
-- public: \
-- static bool parse_ ## name ## _hex(const char* str, int n, void* dest); \
-- static bool parse_ ## name ## _octal(const char* str, int n, void* dest); \
-- static bool parse_ ## name ## _cradix(const char* str, int n, void* dest)
--
-- DECLARE_INTEGER_PARSER(short);
-- DECLARE_INTEGER_PARSER(ushort);
-- DECLARE_INTEGER_PARSER(int);
-- DECLARE_INTEGER_PARSER(uint);
-- DECLARE_INTEGER_PARSER(long);
-- DECLARE_INTEGER_PARSER(ulong);
-- DECLARE_INTEGER_PARSER(longlong);
-- DECLARE_INTEGER_PARSER(ulonglong);
--
--#undef DECLARE_INTEGER_PARSER
--};
--
--inline RE2::Arg::Arg() : arg_(NULL), parser_(parse_null) { }
--inline RE2::Arg::Arg(void* p) : arg_(p), parser_(parse_null) { }
--
--inline bool RE2::Arg::Parse(const char* str, int n) const {
-- return (*parser_)(str, n, arg_);
--}
--
--// This part of the parser, appropriate only for ints, deals with bases
--#define MAKE_INTEGER_PARSER(type, name) \
-- inline RE2::Arg RE2::Hex(type* ptr) { \
-- return RE2::Arg(ptr, RE2::Arg::parse_ ## name ## _hex); } \
-- inline RE2::Arg RE2::Octal(type* ptr) { \
-- return RE2::Arg(ptr, RE2::Arg::parse_ ## name ## _octal); } \
-- inline RE2::Arg RE2::CRadix(type* ptr) { \
-- return RE2::Arg(ptr, RE2::Arg::parse_ ## name ## _cradix); }
--
--MAKE_INTEGER_PARSER(short, short);
--MAKE_INTEGER_PARSER(unsigned short, ushort);
--MAKE_INTEGER_PARSER(int, int);
--MAKE_INTEGER_PARSER(unsigned int, uint);
--MAKE_INTEGER_PARSER(long, long);
--MAKE_INTEGER_PARSER(unsigned long, ulong);
--MAKE_INTEGER_PARSER(long long, longlong);
--MAKE_INTEGER_PARSER(unsigned long long, ulonglong);
--
--#undef MAKE_INTEGER_PARSER
--
--} // namespace re2
--
--using re2::RE2;
--
--#endif /* RE2_RE2_H */
-diff --git a/re2/re2/regexp.cc b/re2/re2/regexp.cc
-deleted file mode 100644
-index 9486b3c..0000000
---- a/re2/re2/regexp.cc
-+++ /dev/null
-@@ -1,920 +0,0 @@
--// Copyright 2006 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Regular expression representation.
--// Tested by parse_test.cc
--
--#include "util/util.h"
--#include "re2/regexp.h"
--#include "re2/stringpiece.h"
--#include "re2/walker-inl.h"
--
--namespace re2 {
--
--// Constructor. Allocates vectors as appropriate for operator.
--Regexp::Regexp(RegexpOp op, ParseFlags parse_flags)
-- : op_(op),
-- simple_(false),
-- parse_flags_(static_cast<uint16>(parse_flags)),
-- ref_(1),
-- nsub_(0),
-- down_(NULL) {
-- subone_ = NULL;
-- memset(the_union_, 0, sizeof the_union_);
--}
--
--// Destructor. Assumes already cleaned up children.
--// Private: use Decref() instead of delete to destroy Regexps.
--// Can't call Decref on the sub-Regexps here because
--// that could cause arbitrarily deep recursion, so
--// required Decref() to have handled them for us.
--Regexp::~Regexp() {
-- if (nsub_ > 0)
-- LOG(DFATAL) << "Regexp not destroyed.";
--
-- switch (op_) {
-- default:
-- break;
-- case kRegexpCapture:
-- delete name_;
-- break;
-- case kRegexpLiteralString:
-- delete[] runes_;
-- break;
-- case kRegexpCharClass:
-- cc_->Delete();
-- delete ccb_;
-- break;
-- }
--}
--
--// If it's possible to destroy this regexp without recurring,
--// do so and return true. Else return false.
--bool Regexp::QuickDestroy() {
-- if (nsub_ == 0) {
-- delete this;
-- return true;
-- }
-- return false;
--}
--
--static map<Regexp*, int> ref_map;
--static Mutex ref_mutex;
--
--int Regexp::Ref() {
-- if (ref_ < kMaxRef)
-- return ref_;
--
-- MutexLock l(&ref_mutex);
-- return ref_map[this];
--}
--
--// Increments reference count, returns object as convenience.
--Regexp* Regexp::Incref() {
-- if (ref_ >= kMaxRef-1) {
-- // Store ref count in overflow map.
-- MutexLock l(&ref_mutex);
-- if (ref_ == kMaxRef) { // already overflowed
-- ref_map[this]++;
-- return this;
-- }
-- // overflowing now
-- ref_map[this] = kMaxRef;
-- ref_ = kMaxRef;
-- return this;
-- }
--
-- ref_++;
-- return this;
--}
--
--// Decrements reference count and deletes this object if count reaches 0.
--void Regexp::Decref() {
-- if (ref_ == kMaxRef) {
-- // Ref count is stored in overflow map.
-- MutexLock l(&ref_mutex);
-- int r = ref_map[this] - 1;
-- if (r < kMaxRef) {
-- ref_ = r;
-- ref_map.erase(this);
-- } else {
-- ref_map[this] = r;
-- }
-- return;
-- }
-- ref_--;
-- if (ref_ == 0)
-- Destroy();
--}
--
--// Deletes this object; ref count has count reached 0.
--void Regexp::Destroy() {
-- if (QuickDestroy())
-- return;
--
-- // Handle recursive Destroy with explicit stack
-- // to avoid arbitrarily deep recursion on process stack [sigh].
-- down_ = NULL;
-- Regexp* stack = this;
-- while (stack != NULL) {
-- Regexp* re = stack;
-- stack = re->down_;
-- if (re->ref_ != 0)
-- LOG(DFATAL) << "Bad reference count " << re->ref_;
-- if (re->nsub_ > 0) {
-- Regexp** subs = re->sub();
-- for (int i = 0; i < re->nsub_; i++) {
-- Regexp* sub = subs[i];
-- if (sub == NULL)
-- continue;
-- if (sub->ref_ == kMaxRef)
-- sub->Decref();
-- else
-- --sub->ref_;
-- if (sub->ref_ == 0 && !sub->QuickDestroy()) {
-- sub->down_ = stack;
-- stack = sub;
-- }
-- }
-- if (re->nsub_ > 1)
-- delete[] subs;
-- re->nsub_ = 0;
-- }
-- delete re;
-- }
--}
--
--void Regexp::AddRuneToString(Rune r) {
-- DCHECK(op_ == kRegexpLiteralString);
-- if (nrunes_ == 0) {
-- // start with 8
-- runes_ = new Rune[8];
-- } else if (nrunes_ >= 8 && (nrunes_ & (nrunes_ - 1)) == 0) {
-- // double on powers of two
-- Rune *old = runes_;
-- runes_ = new Rune[nrunes_ * 2];
-- for (int i = 0; i < nrunes_; i++)
-- runes_[i] = old[i];
-- delete[] old;
-- }
--
-- runes_[nrunes_++] = r;
--}
--
--Regexp* Regexp::HaveMatch(int match_id, ParseFlags flags) {
-- Regexp* re = new Regexp(kRegexpHaveMatch, flags);
-- re->match_id_ = match_id;
-- return re;
--}
--
--Regexp* Regexp::Plus(Regexp* sub, ParseFlags flags) {
-- if (sub->op() == kRegexpPlus && sub->parse_flags() == flags)
-- return sub;
-- Regexp* re = new Regexp(kRegexpPlus, flags);
-- re->AllocSub(1);
-- re->sub()[0] = sub;
-- return re;
--}
--
--Regexp* Regexp::Star(Regexp* sub, ParseFlags flags) {
-- if (sub->op() == kRegexpStar && sub->parse_flags() == flags)
-- return sub;
-- Regexp* re = new Regexp(kRegexpStar, flags);
-- re->AllocSub(1);
-- re->sub()[0] = sub;
-- return re;
--}
--
--Regexp* Regexp::Quest(Regexp* sub, ParseFlags flags) {
-- if (sub->op() == kRegexpQuest && sub->parse_flags() == flags)
-- return sub;
-- Regexp* re = new Regexp(kRegexpQuest, flags);
-- re->AllocSub(1);
-- re->sub()[0] = sub;
-- return re;
--}
--
--Regexp* Regexp::ConcatOrAlternate(RegexpOp op, Regexp** sub, int nsub,
-- ParseFlags flags, bool can_factor) {
-- if (nsub == 1)
-- return sub[0];
--
-- Regexp** subcopy = NULL;
-- if (op == kRegexpAlternate && can_factor) {
-- // Going to edit sub; make a copy so we don't step on caller.
-- subcopy = new Regexp*[nsub];
-- memmove(subcopy, sub, nsub * sizeof sub[0]);
-- sub = subcopy;
-- nsub = FactorAlternation(sub, nsub, flags);
-- if (nsub == 1) {
-- Regexp* re = sub[0];
-- delete[] subcopy;
-- return re;
-- }
-- }
--
-- if (nsub > kMaxNsub) {
-- // Too many subexpressions to fit in a single Regexp.
-- // Make a two-level tree. Two levels gets us to 65535^2.
-- int nbigsub = (nsub+kMaxNsub-1)/kMaxNsub;
-- Regexp* re = new Regexp(op, flags);
-- re->AllocSub(nbigsub);
-- Regexp** subs = re->sub();
-- for (int i = 0; i < nbigsub - 1; i++)
-- subs[i] = ConcatOrAlternate(op, sub+i*kMaxNsub, kMaxNsub, flags, false);
-- subs[nbigsub - 1] = ConcatOrAlternate(op, sub+(nbigsub-1)*kMaxNsub,
-- nsub - (nbigsub-1)*kMaxNsub, flags,
-- false);
-- delete[] subcopy;
-- return re;
-- }
--
-- Regexp* re = new Regexp(op, flags);
-- re->AllocSub(nsub);
-- Regexp** subs = re->sub();
-- for (int i = 0; i < nsub; i++)
-- subs[i] = sub[i];
--
-- delete[] subcopy;
-- return re;
--}
--
--Regexp* Regexp::Concat(Regexp** sub, int nsub, ParseFlags flags) {
-- return ConcatOrAlternate(kRegexpConcat, sub, nsub, flags, false);
--}
--
--Regexp* Regexp::Alternate(Regexp** sub, int nsub, ParseFlags flags) {
-- return ConcatOrAlternate(kRegexpAlternate, sub, nsub, flags, true);
--}
--
--Regexp* Regexp::AlternateNoFactor(Regexp** sub, int nsub, ParseFlags flags) {
-- return ConcatOrAlternate(kRegexpAlternate, sub, nsub, flags, false);
--}
--
--Regexp* Regexp::Capture(Regexp* sub, ParseFlags flags, int cap) {
-- Regexp* re = new Regexp(kRegexpCapture, flags);
-- re->AllocSub(1);
-- re->sub()[0] = sub;
-- re->cap_ = cap;
-- return re;
--}
--
--Regexp* Regexp::Repeat(Regexp* sub, ParseFlags flags, int min, int max) {
-- Regexp* re = new Regexp(kRegexpRepeat, flags);
-- re->AllocSub(1);
-- re->sub()[0] = sub;
-- re->min_ = min;
-- re->max_ = max;
-- return re;
--}
--
--Regexp* Regexp::NewLiteral(Rune rune, ParseFlags flags) {
-- Regexp* re = new Regexp(kRegexpLiteral, flags);
-- re->rune_ = rune;
-- return re;
--}
--
--Regexp* Regexp::LiteralString(Rune* runes, int nrunes, ParseFlags flags) {
-- if (nrunes <= 0)
-- return new Regexp(kRegexpEmptyMatch, flags);
-- if (nrunes == 1)
-- return NewLiteral(runes[0], flags);
-- Regexp* re = new Regexp(kRegexpLiteralString, flags);
-- for (int i = 0; i < nrunes; i++)
-- re->AddRuneToString(runes[i]);
-- return re;
--}
--
--Regexp* Regexp::NewCharClass(CharClass* cc, ParseFlags flags) {
-- Regexp* re = new Regexp(kRegexpCharClass, flags);
-- re->cc_ = cc;
-- return re;
--}
--
--// Swaps this and that in place.
--void Regexp::Swap(Regexp* that) {
-- // Can use memmove because Regexp is just a struct (no vtable).
-- char tmp[sizeof *this];
-- memmove(tmp, this, sizeof tmp);
-- memmove(this, that, sizeof tmp);
-- memmove(that, tmp, sizeof tmp);
--}
--
--// Tests equality of all top-level structure but not subregexps.
--static bool TopEqual(Regexp* a, Regexp* b) {
-- if (a->op() != b->op())
-- return false;
--
-- switch (a->op()) {
-- case kRegexpNoMatch:
-- case kRegexpEmptyMatch:
-- case kRegexpAnyChar:
-- case kRegexpAnyByte:
-- case kRegexpBeginLine:
-- case kRegexpEndLine:
-- case kRegexpWordBoundary:
-- case kRegexpNoWordBoundary:
-- case kRegexpBeginText:
-- return true;
--
-- case kRegexpEndText:
-- // The parse flags remember whether it's \z or (?-m:$),
-- // which matters when testing against PCRE.
-- return ((a->parse_flags() ^ b->parse_flags()) & Regexp::WasDollar) == 0;
--
-- case kRegexpLiteral:
-- return a->rune() == b->rune() &&
-- ((a->parse_flags() ^ b->parse_flags()) & Regexp::FoldCase) == 0;
--
-- case kRegexpLiteralString:
-- return a->nrunes() == b->nrunes() &&
-- ((a->parse_flags() ^ b->parse_flags()) & Regexp::FoldCase) == 0 &&
-- memcmp(a->runes(), b->runes(),
-- a->nrunes() * sizeof a->runes()[0]) == 0;
--
-- case kRegexpAlternate:
-- case kRegexpConcat:
-- return a->nsub() == b->nsub();
--
-- case kRegexpStar:
-- case kRegexpPlus:
-- case kRegexpQuest:
-- return ((a->parse_flags() ^ b->parse_flags()) & Regexp::NonGreedy) == 0;
--
-- case kRegexpRepeat:
-- return ((a->parse_flags() ^ b->parse_flags()) & Regexp::NonGreedy) == 0 &&
-- a->min() == b->min() &&
-- a->max() == b->max();
--
-- case kRegexpCapture:
-- return a->cap() == b->cap() && a->name() == b->name();
--
-- case kRegexpHaveMatch:
-- return a->match_id() == b->match_id();
--
-- case kRegexpCharClass: {
-- CharClass* acc = a->cc();
-- CharClass* bcc = b->cc();
-- return acc->size() == bcc->size() &&
-- acc->end() - acc->begin() == bcc->end() - bcc->begin() &&
-- memcmp(acc->begin(), bcc->begin(),
-- (acc->end() - acc->begin()) * sizeof acc->begin()[0]) == 0;
-- }
-- }
--
-- LOG(DFATAL) << "Unexpected op in Regexp::Equal: " << a->op();
-- return 0;
--}
--
--bool Regexp::Equal(Regexp* a, Regexp* b) {
-- if (a == NULL || b == NULL)
-- return a == b;
--
-- if (!TopEqual(a, b))
-- return false;
--
-- // Fast path:
-- // return without allocating vector if there are no subregexps.
-- switch (a->op()) {
-- case kRegexpAlternate:
-- case kRegexpConcat:
-- case kRegexpStar:
-- case kRegexpPlus:
-- case kRegexpQuest:
-- case kRegexpRepeat:
-- case kRegexpCapture:
-- break;
--
-- default:
-- return true;
-- }
--
-- // Committed to doing real work.
-- // The stack (vector) has pairs of regexps waiting to
-- // be compared. The regexps are only equal if
-- // all the pairs end up being equal.
-- vector<Regexp*> stk;
--
-- for (;;) {
-- // Invariant: TopEqual(a, b) == true.
-- Regexp* a2;
-- Regexp* b2;
-- switch (a->op()) {
-- default:
-- break;
-- case kRegexpAlternate:
-- case kRegexpConcat:
-- for (int i = 0; i < a->nsub(); i++) {
-- a2 = a->sub()[i];
-- b2 = b->sub()[i];
-- if (!TopEqual(a2, b2))
-- return false;
-- stk.push_back(a2);
-- stk.push_back(b2);
-- }
-- break;
--
-- case kRegexpStar:
-- case kRegexpPlus:
-- case kRegexpQuest:
-- case kRegexpRepeat:
-- case kRegexpCapture:
-- a2 = a->sub()[0];
-- b2 = b->sub()[0];
-- if (!TopEqual(a2, b2))
-- return false;
-- // Really:
-- // stk.push_back(a2);
-- // stk.push_back(b2);
-- // break;
-- // but faster to assign directly and loop.
-- a = a2;
-- b = b2;
-- continue;
-- }
--
-- int n = stk.size();
-- if (n == 0)
-- break;
--
-- a = stk[n-2];
-- b = stk[n-1];
-- stk.resize(n-2);
-- }
--
-- return true;
--}
--
--// Keep in sync with enum RegexpStatusCode in regexp.h
--static const string kErrorStrings[] = {
-- "no error",
-- "unexpected error",
-- "invalid escape sequence",
-- "invalid character class",
-- "invalid character class range",
-- "missing ]",
-- "missing )",
-- "trailing \\",
-- "no argument for repetition operator",
-- "invalid repetition size",
-- "bad repetition operator",
-- "invalid perl operator",
-- "invalid UTF-8",
-- "invalid named capture group",
--};
--
--const string& RegexpStatus::CodeText(enum RegexpStatusCode code) {
-- if (code < 0 || code >= arraysize(kErrorStrings))
-- code = kRegexpInternalError;
-- return kErrorStrings[code];
--}
--
--string RegexpStatus::Text() const {
-- if (error_arg_.empty())
-- return CodeText(code_);
-- string s;
-- s.append(CodeText(code_));
-- s.append(": ");
-- s.append(error_arg_.data(), error_arg_.size());
-- return s;
--}
--
--void RegexpStatus::Copy(const RegexpStatus& status) {
-- code_ = status.code_;
-- error_arg_ = status.error_arg_;
--}
--
--typedef int Ignored; // Walker<void> doesn't exist
--
--// Walker subclass to count capturing parens in regexp.
--class NumCapturesWalker : public Regexp::Walker<Ignored> {
-- public:
-- NumCapturesWalker() : ncapture_(0) {}
-- int ncapture() { return ncapture_; }
--
-- virtual Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) {
-- if (re->op() == kRegexpCapture)
-- ncapture_++;
-- return ignored;
-- }
-- virtual Ignored ShortVisit(Regexp* re, Ignored ignored) {
-- // Should never be called: we use Walk not WalkExponential.
-- LOG(DFATAL) << "NumCapturesWalker::ShortVisit called";
-- return ignored;
-- }
--
-- private:
-- int ncapture_;
-- DISALLOW_EVIL_CONSTRUCTORS(NumCapturesWalker);
--};
--
--int Regexp::NumCaptures() {
-- NumCapturesWalker w;
-- w.Walk(this, 0);
-- return w.ncapture();
--}
--
--// Walker class to build map of named capture groups and their indices.
--class NamedCapturesWalker : public Regexp::Walker<Ignored> {
-- public:
-- NamedCapturesWalker() : map_(NULL) {}
-- ~NamedCapturesWalker() { delete map_; }
--
-- map<string, int>* TakeMap() {
-- map<string, int>* m = map_;
-- map_ = NULL;
-- return m;
-- }
--
-- Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) {
-- if (re->op() == kRegexpCapture && re->name() != NULL) {
-- // Allocate map once we find a name.
-- if (map_ == NULL)
-- map_ = new map<string, int>;
--
-- // Record first occurrence of each name.
-- // (The rule is that if you have the same name
-- // multiple times, only the leftmost one counts.)
-- if (map_->find(*re->name()) == map_->end())
-- (*map_)[*re->name()] = re->cap();
-- }
-- return ignored;
-- }
--
-- virtual Ignored ShortVisit(Regexp* re, Ignored ignored) {
-- // Should never be called: we use Walk not WalkExponential.
-- LOG(DFATAL) << "NamedCapturesWalker::ShortVisit called";
-- return ignored;
-- }
--
-- private:
-- map<string, int>* map_;
-- DISALLOW_EVIL_CONSTRUCTORS(NamedCapturesWalker);
--};
--
--map<string, int>* Regexp::NamedCaptures() {
-- NamedCapturesWalker w;
-- w.Walk(this, 0);
-- return w.TakeMap();
--}
--
--// Walker class to build map from capture group indices to their names.
--class CaptureNamesWalker : public Regexp::Walker<Ignored> {
-- public:
-- CaptureNamesWalker() : map_(NULL) {}
-- ~CaptureNamesWalker() { delete map_; }
--
-- map<int, string>* TakeMap() {
-- map<int, string>* m = map_;
-- map_ = NULL;
-- return m;
-- }
--
-- Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) {
-- if (re->op() == kRegexpCapture && re->name() != NULL) {
-- // Allocate map once we find a name.
-- if (map_ == NULL)
-- map_ = new map<int, string>;
--
-- (*map_)[re->cap()] = *re->name();
-- }
-- return ignored;
-- }
--
-- virtual Ignored ShortVisit(Regexp* re, Ignored ignored) {
-- // Should never be called: we use Walk not WalkExponential.
-- LOG(DFATAL) << "CaptureNamesWalker::ShortVisit called";
-- return ignored;
-- }
--
-- private:
-- map<int, string>* map_;
-- DISALLOW_EVIL_CONSTRUCTORS(CaptureNamesWalker);
--};
--
--map<int, string>* Regexp::CaptureNames() {
-- CaptureNamesWalker w;
-- w.Walk(this, 0);
-- return w.TakeMap();
--}
--
--// Determines whether regexp matches must be anchored
--// with a fixed string prefix. If so, returns the prefix and
--// the regexp that remains after the prefix. The prefix might
--// be ASCII case-insensitive.
--bool Regexp::RequiredPrefix(string *prefix, bool *foldcase, Regexp** suffix) {
-- // No need for a walker: the regexp must be of the form
-- // 1. some number of ^ anchors
-- // 2. a literal char or string
-- // 3. the rest
-- prefix->clear();
-- *foldcase = false;
-- *suffix = NULL;
-- if (op_ != kRegexpConcat)
-- return false;
--
-- // Some number of anchors, then a literal or concatenation.
-- int i = 0;
-- Regexp** sub = this->sub();
-- while (i < nsub_ && sub[i]->op_ == kRegexpBeginText)
-- i++;
-- if (i == 0 || i >= nsub_)
-- return false;
--
-- Regexp* re = sub[i];
-- switch (re->op_) {
-- default:
-- return false;
--
-- case kRegexpLiteralString:
-- // Convert to string in proper encoding.
-- if (re->parse_flags() & Latin1) {
-- prefix->resize(re->nrunes_);
-- for (int j = 0; j < re->nrunes_; j++)
-- (*prefix)[j] = re->runes_[j];
-- } else {
-- // Convert to UTF-8 in place.
-- // Assume worst-case space and then trim.
-- prefix->resize(re->nrunes_ * UTFmax);
-- char *p = &(*prefix)[0];
-- for (int j = 0; j < re->nrunes_; j++) {
-- Rune r = re->runes_[j];
-- if (r < Runeself)
-- *p++ = r;
-- else
-- p += runetochar(p, &r);
-- }
-- prefix->resize(p - &(*prefix)[0]);
-- }
-- break;
--
-- case kRegexpLiteral:
-- if ((re->parse_flags() & Latin1) || re->rune_ < Runeself) {
-- prefix->append(1, re->rune_);
-- } else {
-- char buf[UTFmax];
-- prefix->append(buf, runetochar(buf, &re->rune_));
-- }
-- break;
-- }
-- *foldcase = (sub[i]->parse_flags() & FoldCase);
-- i++;
--
-- // The rest.
-- if (i < nsub_) {
-- for (int j = i; j < nsub_; j++)
-- sub[j]->Incref();
-- re = Concat(sub + i, nsub_ - i, parse_flags());
-- } else {
-- re = new Regexp(kRegexpEmptyMatch, parse_flags());
-- }
-- *suffix = re;
-- return true;
--}
--
--// Character class builder is a balanced binary tree (STL set)
--// containing non-overlapping, non-abutting RuneRanges.
--// The less-than operator used in the tree treats two
--// ranges as equal if they overlap at all, so that
--// lookups for a particular Rune are possible.
--
--CharClassBuilder::CharClassBuilder() {
-- nrunes_ = 0;
-- upper_ = 0;
-- lower_ = 0;
--}
--
--// Add lo-hi to the class; return whether class got bigger.
--bool CharClassBuilder::AddRange(Rune lo, Rune hi) {
-- if (hi < lo)
-- return false;
--
-- if (lo <= 'z' && hi >= 'A') {
-- // Overlaps some alpha, maybe not all.
-- // Update bitmaps telling which ASCII letters are in the set.
-- Rune lo1 = max<Rune>(lo, 'A');
-- Rune hi1 = min<Rune>(hi, 'Z');
-- if (lo1 <= hi1)
-- upper_ |= ((1 << (hi1 - lo1 + 1)) - 1) << (lo1 - 'A');
--
-- lo1 = max<Rune>(lo, 'a');
-- hi1 = min<Rune>(hi, 'z');
-- if (lo1 <= hi1)
-- lower_ |= ((1 << (hi1 - lo1 + 1)) - 1) << (lo1 - 'a');
-- }
--
-- { // Check whether lo, hi is already in the class.
-- iterator it = ranges_.find(RuneRange(lo, lo));
-- if (it != end() && it->lo <= lo && hi <= it->hi)
-- return false;
-- }
--
-- // Look for a range abutting lo on the left.
-- // If it exists, take it out and increase our range.
-- if (lo > 0) {
-- iterator it = ranges_.find(RuneRange(lo-1, lo-1));
-- if (it != end()) {
-- lo = it->lo;
-- if (it->hi > hi)
-- hi = it->hi;
-- nrunes_ -= it->hi - it->lo + 1;
-- ranges_.erase(it);
-- }
-- }
--
-- // Look for a range abutting hi on the right.
-- // If it exists, take it out and increase our range.
-- if (hi < Runemax) {
-- iterator it = ranges_.find(RuneRange(hi+1, hi+1));
-- if (it != end()) {
-- hi = it->hi;
-- nrunes_ -= it->hi - it->lo + 1;
-- ranges_.erase(it);
-- }
-- }
--
-- // Look for ranges between lo and hi. Take them out.
-- // This is only safe because the set has no overlapping ranges.
-- // We've already removed any ranges abutting lo and hi, so
-- // any that overlap [lo, hi] must be contained within it.
-- for (;;) {
-- iterator it = ranges_.find(RuneRange(lo, hi));
-- if (it == end())
-- break;
-- nrunes_ -= it->hi - it->lo + 1;
-- ranges_.erase(it);
-- }
--
-- // Finally, add [lo, hi].
-- nrunes_ += hi - lo + 1;
-- ranges_.insert(RuneRange(lo, hi));
-- return true;
--}
--
--void CharClassBuilder::AddCharClass(CharClassBuilder *cc) {
-- for (iterator it = cc->begin(); it != cc->end(); ++it)
-- AddRange(it->lo, it->hi);
--}
--
--bool CharClassBuilder::Contains(Rune r) {
-- return ranges_.find(RuneRange(r, r)) != end();
--}
--
--// Does the character class behave the same on A-Z as on a-z?
--bool CharClassBuilder::FoldsASCII() {
-- return ((upper_ ^ lower_) & AlphaMask) == 0;
--}
--
--CharClassBuilder* CharClassBuilder::Copy() {
-- CharClassBuilder* cc = new CharClassBuilder;
-- for (iterator it = begin(); it != end(); ++it)
-- cc->ranges_.insert(RuneRange(it->lo, it->hi));
-- cc->upper_ = upper_;
-- cc->lower_ = lower_;
-- cc->nrunes_ = nrunes_;
-- return cc;
--}
--
--
--
--void CharClassBuilder::RemoveAbove(Rune r) {
-- if (r >= Runemax)
-- return;
--
-- if (r < 'z') {
-- if (r < 'a')
-- lower_ = 0;
-- else
-- lower_ &= AlphaMask >> ('z' - r);
-- }
--
-- if (r < 'Z') {
-- if (r < 'A')
-- upper_ = 0;
-- else
-- upper_ &= AlphaMask >> ('Z' - r);
-- }
--
-- for (;;) {
--
-- iterator it = ranges_.find(RuneRange(r + 1, Runemax));
-- if (it == end())
-- break;
-- RuneRange rr = *it;
-- ranges_.erase(it);
-- nrunes_ -= rr.hi - rr.lo + 1;
-- if (rr.lo <= r) {
-- rr.hi = r;
-- ranges_.insert(rr);
-- nrunes_ += rr.hi - rr.lo + 1;
-- }
-- }
--}
--
--void CharClassBuilder::Negate() {
-- // Build up negation and then copy in.
-- // Could edit ranges in place, but C++ won't let me.
-- vector<RuneRange> v;
-- v.reserve(ranges_.size() + 1);
--
-- // In negation, first range begins at 0, unless
-- // the current class begins at 0.
-- iterator it = begin();
-- if (it == end()) {
-- v.push_back(RuneRange(0, Runemax));
-- } else {
-- int nextlo = 0;
-- if (it->lo == 0) {
-- nextlo = it->hi + 1;
-- ++it;
-- }
-- for (; it != end(); ++it) {
-- v.push_back(RuneRange(nextlo, it->lo - 1));
-- nextlo = it->hi + 1;
-- }
-- if (nextlo <= Runemax)
-- v.push_back(RuneRange(nextlo, Runemax));
-- }
--
-- ranges_.clear();
-- for (int i = 0; i < v.size(); i++)
-- ranges_.insert(v[i]);
--
-- upper_ = AlphaMask & ~upper_;
-- lower_ = AlphaMask & ~lower_;
-- nrunes_ = Runemax+1 - nrunes_;
--}
--
--// Character class is a sorted list of ranges.
--// The ranges are allocated in the same block as the header,
--// necessitating a special allocator and Delete method.
--
--CharClass* CharClass::New(int maxranges) {
-- CharClass* cc;
-- uint8* data = new uint8[sizeof *cc + maxranges*sizeof cc->ranges_[0]];
-- cc = reinterpret_cast<CharClass*>(data);
-- cc->ranges_ = reinterpret_cast<RuneRange*>(data + sizeof *cc);
-- cc->nranges_ = 0;
-- cc->folds_ascii_ = false;
-- cc->nrunes_ = 0;
-- return cc;
--}
--
--void CharClass::Delete() {
-- if (this == NULL)
-- return;
-- uint8 *data = reinterpret_cast<uint8*>(this);
-- delete[] data;
--}
--
--CharClass* CharClass::Negate() {
-- CharClass* cc = CharClass::New(nranges_+1);
-- cc->folds_ascii_ = folds_ascii_;
-- cc->nrunes_ = Runemax + 1 - nrunes_;
-- int n = 0;
-- int nextlo = 0;
-- for (CharClass::iterator it = begin(); it != end(); ++it) {
-- if (it->lo == nextlo) {
-- nextlo = it->hi + 1;
-- } else {
-- cc->ranges_[n++] = RuneRange(nextlo, it->lo - 1);
-- nextlo = it->hi + 1;
-- }
-- }
-- if (nextlo <= Runemax)
-- cc->ranges_[n++] = RuneRange(nextlo, Runemax);
-- cc->nranges_ = n;
-- return cc;
--}
--
--bool CharClass::Contains(Rune r) {
-- RuneRange* rr = ranges_;
-- int n = nranges_;
-- while (n > 0) {
-- int m = n/2;
-- if (rr[m].hi < r) {
-- rr += m+1;
-- n -= m+1;
-- } else if (r < rr[m].lo) {
-- n = m;
-- } else { // rr[m].lo <= r && r <= rr[m].hi
-- return true;
-- }
-- }
-- return false;
--}
--
--CharClass* CharClassBuilder::GetCharClass() {
-- CharClass* cc = CharClass::New(ranges_.size());
-- int n = 0;
-- for (iterator it = begin(); it != end(); ++it)
-- cc->ranges_[n++] = *it;
-- cc->nranges_ = n;
-- DCHECK_LE(n, ranges_.size());
-- cc->nrunes_ = nrunes_;
-- cc->folds_ascii_ = FoldsASCII();
-- return cc;
--}
--
--} // namespace re2
-diff --git a/re2/re2/regexp.h b/re2/re2/regexp.h
-deleted file mode 100644
-index 1aebc16..0000000
---- a/re2/re2/regexp.h
-+++ /dev/null
-@@ -1,632 +0,0 @@
--// Copyright 2006 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// --- SPONSORED LINK --------------------------------------------------
--// If you want to use this library for regular expression matching,
--// you should use re2/re2.h, which provides a class RE2 that
--// mimics the PCRE interface provided by PCRE's C++ wrappers.
--// This header describes the low-level interface used to implement RE2
--// and may change in backwards-incompatible ways from time to time.
--// In contrast, RE2's interface will not.
--// ---------------------------------------------------------------------
--
--// Regular expression library: parsing, execution, and manipulation
--// of regular expressions.
--//
--// Any operation that traverses the Regexp structures should be written
--// using Regexp::Walker (see walker-inl.h), not recursively, because deeply nested
--// regular expressions such as x++++++++++++++++++++... might cause recursive
--// traversals to overflow the stack.
--//
--// It is the caller's responsibility to provide appropriate mutual exclusion
--// around manipulation of the regexps. RE2 does this.
--//
--// PARSING
--//
--// Regexp::Parse parses regular expressions encoded in UTF-8.
--// The default syntax is POSIX extended regular expressions,
--// with the following changes:
--//
--// 1. Backreferences (optional in POSIX EREs) are not supported.
--// (Supporting them precludes the use of DFA-based
--// matching engines.)
--//
--// 2. Collating elements and collation classes are not supported.
--// (No one has needed or wanted them.)
--//
--// The exact syntax accepted can be modified by passing flags to
--// Regexp::Parse. In particular, many of the basic Perl additions
--// are available. The flags are documented below (search for LikePerl).
--//
--// If parsed with the flag Regexp::Latin1, both the regular expression
--// and the input to the matching routines are assumed to be encoded in
--// Latin-1, not UTF-8.
--//
--// EXECUTION
--//
--// Once Regexp has parsed a regular expression, it provides methods
--// to search text using that regular expression. These methods are
--// implemented via calling out to other regular expression libraries.
--// (Let's call them the sublibraries.)
--//
--// To call a sublibrary, Regexp does not simply prepare a
--// string version of the regular expression and hand it to the
--// sublibrary. Instead, Regexp prepares, from its own parsed form, the
--// corresponding internal representation used by the sublibrary.
--// This has the drawback of needing to know the internal representation
--// used by the sublibrary, but it has two important benefits:
--//
--// 1. The syntax and meaning of regular expressions is guaranteed
--// to be that used by Regexp's parser, not the syntax expected
--// by the sublibrary. Regexp might accept a restricted or
--// expanded syntax for regular expressions as compared with
--// the sublibrary. As long as Regexp can translate from its
--// internal form into the sublibrary's, clients need not know
--// exactly which sublibrary they are using.
--//
--// 2. The sublibrary parsers are bypassed. For whatever reason,
--// sublibrary regular expression parsers often have security
--// problems. For example, plan9grep's regular expression parser
--// has a buffer overflow in its handling of large character
--// classes, and PCRE's parser has had buffer overflow problems
--// in the past. Security-team requires sandboxing of sublibrary
--// regular expression parsers. Avoiding the sublibrary parsers
--// avoids the sandbox.
--//
--// The execution methods we use now are provided by the compiled form,
--// Prog, described in prog.h
--//
--// MANIPULATION
--//
--// Unlike other regular expression libraries, Regexp makes its parsed
--// form accessible to clients, so that client code can analyze the
--// parsed regular expressions.
--
--#ifndef RE2_REGEXP_H__
--#define RE2_REGEXP_H__
--
--#include "util/util.h"
--#include "re2/stringpiece.h"
--
--namespace re2 {
--
--// Keep in sync with string list kOpcodeNames[] in testing/dump.cc
--enum RegexpOp {
-- // Matches no strings.
-- kRegexpNoMatch = 1,
--
-- // Matches empty string.
-- kRegexpEmptyMatch,
--
-- // Matches rune_.
-- kRegexpLiteral,
--
-- // Matches runes_.
-- kRegexpLiteralString,
--
-- // Matches concatenation of sub_[0..nsub-1].
-- kRegexpConcat,
-- // Matches union of sub_[0..nsub-1].
-- kRegexpAlternate,
--
-- // Matches sub_[0] zero or more times.
-- kRegexpStar,
-- // Matches sub_[0] one or more times.
-- kRegexpPlus,
-- // Matches sub_[0] zero or one times.
-- kRegexpQuest,
--
-- // Matches sub_[0] at least min_ times, at most max_ times.
-- // max_ == -1 means no upper limit.
-- kRegexpRepeat,
--
-- // Parenthesized (capturing) subexpression. Index is cap_.
-- // Optionally, capturing name is name_.
-- kRegexpCapture,
--
-- // Matches any character.
-- kRegexpAnyChar,
--
-- // Matches any byte [sic].
-- kRegexpAnyByte,
--
-- // Matches empty string at beginning of line.
-- kRegexpBeginLine,
-- // Matches empty string at end of line.
-- kRegexpEndLine,
--
-- // Matches word boundary "\b".
-- kRegexpWordBoundary,
-- // Matches not-a-word boundary "\B".
-- kRegexpNoWordBoundary,
--
-- // Matches empty string at beginning of text.
-- kRegexpBeginText,
-- // Matches empty string at end of text.
-- kRegexpEndText,
--
-- // Matches character class given by cc_.
-- kRegexpCharClass,
--
-- // Forces match of entire expression right now,
-- // with match ID match_id_ (used by RE2::Set).
-- kRegexpHaveMatch,
--
-- kMaxRegexpOp = kRegexpHaveMatch,
--};
--
--// Keep in sync with string list in regexp.cc
--enum RegexpStatusCode {
-- // No error
-- kRegexpSuccess = 0,
--
-- // Unexpected error
-- kRegexpInternalError,
--
-- // Parse errors
-- kRegexpBadEscape, // bad escape sequence
-- kRegexpBadCharClass, // bad character class
-- kRegexpBadCharRange, // bad character class range
-- kRegexpMissingBracket, // missing closing ]
-- kRegexpMissingParen, // missing closing )
-- kRegexpTrailingBackslash, // at end of regexp
-- kRegexpRepeatArgument, // repeat argument missing, e.g. "*"
-- kRegexpRepeatSize, // bad repetition argument
-- kRegexpRepeatOp, // bad repetition operator
-- kRegexpBadPerlOp, // bad perl operator
-- kRegexpBadUTF8, // invalid UTF-8 in regexp
-- kRegexpBadNamedCapture, // bad named capture
--};
--
--// Error status for certain operations.
--class RegexpStatus {
-- public:
-- RegexpStatus() : code_(kRegexpSuccess), tmp_(NULL) {}
-- ~RegexpStatus() { delete tmp_; }
--
-- void set_code(enum RegexpStatusCode code) { code_ = code; }
-- void set_error_arg(const StringPiece& error_arg) { error_arg_ = error_arg; }
-- void set_tmp(string* tmp) { delete tmp_; tmp_ = tmp; }
-- enum RegexpStatusCode code() const { return code_; }
-- const StringPiece& error_arg() const { return error_arg_; }
-- bool ok() const { return code() == kRegexpSuccess; }
--
-- // Copies state from status.
-- void Copy(const RegexpStatus& status);
--
-- // Returns text equivalent of code, e.g.:
-- // "Bad character class"
-- static const string& CodeText(enum RegexpStatusCode code);
--
-- // Returns text describing error, e.g.:
-- // "Bad character class: [z-a]"
-- string Text() const;
--
-- private:
-- enum RegexpStatusCode code_; // Kind of error
-- StringPiece error_arg_; // Piece of regexp containing syntax error.
-- string* tmp_; // Temporary storage, possibly where error_arg_ is.
--
-- DISALLOW_EVIL_CONSTRUCTORS(RegexpStatus);
--};
--
--// Walker to implement Simplify.
--class SimplifyWalker;
--
--// Compiled form; see prog.h
--class Prog;
--
--struct RuneRange {
-- RuneRange() : lo(0), hi(0) { }
-- RuneRange(int l, int h) : lo(l), hi(h) { }
-- Rune lo;
-- Rune hi;
--};
--
--// Less-than on RuneRanges treats a == b if they overlap at all.
--// This lets us look in a set to find the range covering a particular Rune.
--struct RuneRangeLess {
-- bool operator()(const RuneRange& a, const RuneRange& b) const {
-- return a.hi < b.lo;
-- }
--};
--
--class CharClassBuilder;
--
--class CharClass {
-- public:
-- void Delete();
--
-- typedef RuneRange* iterator;
-- iterator begin() { return ranges_; }
-- iterator end() { return ranges_ + nranges_; }
--
-- int size() { return nrunes_; }
-- bool empty() { return nrunes_ == 0; }
-- bool full() { return nrunes_ == Runemax+1; }
-- bool FoldsASCII() { return folds_ascii_; }
--
-- bool Contains(Rune r);
-- CharClass* Negate();
--
-- private:
-- CharClass(); // not implemented
-- ~CharClass(); // not implemented
-- static CharClass* New(int maxranges);
--
-- friend class CharClassBuilder;
--
-- bool folds_ascii_;
-- int nrunes_;
-- RuneRange *ranges_;
-- int nranges_;
-- DISALLOW_EVIL_CONSTRUCTORS(CharClass);
--};
--
--class Regexp {
-- public:
--
-- // Flags for parsing. Can be ORed together.
-- enum ParseFlags {
-- NoParseFlags = 0,
-- FoldCase = 1<<0, // Fold case during matching (case-insensitive).
-- Literal = 1<<1, // Treat s as literal string instead of a regexp.
-- ClassNL = 1<<2, // Allow char classes like [^a-z] and \D and \s
-- // and [[:space:]] to match newline.
-- DotNL = 1<<3, // Allow . to match newline.
-- MatchNL = ClassNL | DotNL,
-- OneLine = 1<<4, // Treat ^ and $ as only matching at beginning and
-- // end of text, not around embedded newlines.
-- // (Perl's default)
-- Latin1 = 1<<5, // Regexp and text are in Latin1, not UTF-8.
-- NonGreedy = 1<<6, // Repetition operators are non-greedy by default.
-- PerlClasses = 1<<7, // Allow Perl character classes like \d.
-- PerlB = 1<<8, // Allow Perl's \b and \B.
-- PerlX = 1<<9, // Perl extensions:
-- // non-capturing parens - (?: )
-- // non-greedy operators - *? +? ?? {}?
-- // flag edits - (?i) (?-i) (?i: )
-- // i - FoldCase
-- // m - !OneLine
-- // s - DotNL
-- // U - NonGreedy
-- // line ends: \A \z
-- // \Q and \E to disable/enable metacharacters
-- // (?P<name>expr) for named captures
-- // \C to match any single byte
-- UnicodeGroups = 1<<10, // Allow \p{Han} for Unicode Han group
-- // and \P{Han} for its negation.
-- NeverNL = 1<<11, // Never match NL, even if the regexp mentions
-- // it explicitly.
--
-- // As close to Perl as we can get.
-- LikePerl = ClassNL | OneLine | PerlClasses | PerlB | PerlX |
-- UnicodeGroups,
--
-- // Internal use only.
-- WasDollar = 1<<15, // on kRegexpEndText: was $ in regexp text
-- };
--
-- // Get. No set, Regexps are logically immutable once created.
-- RegexpOp op() { return static_cast<RegexpOp>(op_); }
-- int nsub() { return nsub_; }
-- bool simple() { return simple_; }
-- enum ParseFlags parse_flags() { return static_cast<ParseFlags>(parse_flags_); }
-- int Ref(); // For testing.
--
-- Regexp** sub() {
-- if(nsub_ <= 1)
-- return &subone_;
-- else
-- return submany_;
-- }
--
-- int min() { DCHECK_EQ(op_, kRegexpRepeat); return min_; }
-- int max() { DCHECK_EQ(op_, kRegexpRepeat); return max_; }
-- Rune rune() { DCHECK_EQ(op_, kRegexpLiteral); return rune_; }
-- CharClass* cc() { DCHECK_EQ(op_, kRegexpCharClass); return cc_; }
-- int cap() { DCHECK_EQ(op_, kRegexpCapture); return cap_; }
-- const string* name() { DCHECK_EQ(op_, kRegexpCapture); return name_; }
-- Rune* runes() { DCHECK_EQ(op_, kRegexpLiteralString); return runes_; }
-- int nrunes() { DCHECK_EQ(op_, kRegexpLiteralString); return nrunes_; }
-- int match_id() { DCHECK_EQ(op_, kRegexpHaveMatch); return match_id_; }
--
-- // Increments reference count, returns object as convenience.
-- Regexp* Incref();
--
-- // Decrements reference count and deletes this object if count reaches 0.
-- void Decref();
--
-- // Parses string s to produce regular expression, returned.
-- // Caller must release return value with re->Decref().
-- // On failure, sets *status (if status != NULL) and returns NULL.
-- static Regexp* Parse(const StringPiece& s, ParseFlags flags,
-- RegexpStatus* status);
--
-- // Returns a _new_ simplified version of the current regexp.
-- // Does not edit the current regexp.
-- // Caller must release return value with re->Decref().
-- // Simplified means that counted repetition has been rewritten
-- // into simpler terms and all Perl/POSIX features have been
-- // removed. The result will capture exactly the same
-- // subexpressions the original did, unless formatted with ToString.
-- Regexp* Simplify();
-- friend class SimplifyWalker;
--
-- // Parses the regexp src and then simplifies it and sets *dst to the
-- // string representation of the simplified form. Returns true on success.
-- // Returns false and sets *status (if status != NULL) on parse error.
-- static bool SimplifyRegexp(const StringPiece& src, ParseFlags flags,
-- string* dst,
-- RegexpStatus* status);
--
-- // Returns the number of capturing groups in the regexp.
-- int NumCaptures();
-- friend class NumCapturesWalker;
--
-- // Returns a map from names to capturing group indices,
-- // or NULL if the regexp contains no named capture groups.
-- // The caller is responsible for deleting the map.
-- map<string, int>* NamedCaptures();
--
-- // Returns a map from capturing group indices to capturing group
-- // names or NULL if the regexp contains no named capture groups. The
-- // caller is responsible for deleting the map.
-- map<int, string>* CaptureNames();
--
-- // Returns a string representation of the current regexp,
-- // using as few parentheses as possible.
-- string ToString();
--
-- // Convenience functions. They consume the passed reference,
-- // so in many cases you should use, e.g., Plus(re->Incref(), flags).
-- // They do not consume allocated arrays like subs or runes.
-- static Regexp* Plus(Regexp* sub, ParseFlags flags);
-- static Regexp* Star(Regexp* sub, ParseFlags flags);
-- static Regexp* Quest(Regexp* sub, ParseFlags flags);
-- static Regexp* Concat(Regexp** subs, int nsubs, ParseFlags flags);
-- static Regexp* Alternate(Regexp** subs, int nsubs, ParseFlags flags);
-- static Regexp* Capture(Regexp* sub, ParseFlags flags, int cap);
-- static Regexp* Repeat(Regexp* sub, ParseFlags flags, int min, int max);
-- static Regexp* NewLiteral(Rune rune, ParseFlags flags);
-- static Regexp* NewCharClass(CharClass* cc, ParseFlags flags);
-- static Regexp* LiteralString(Rune* runes, int nrunes, ParseFlags flags);
-- static Regexp* HaveMatch(int match_id, ParseFlags flags);
--
-- // Like Alternate but does not factor out common prefixes.
-- static Regexp* AlternateNoFactor(Regexp** subs, int nsubs, ParseFlags flags);
--
-- // Debugging function. Returns string format for regexp
-- // that makes structure clear. Does NOT use regexp syntax.
-- string Dump();
--
-- // Helper traversal class, defined fully in walker-inl.h.
-- template<typename T> class Walker;
--
-- // Compile to Prog. See prog.h
-- // Reverse prog expects to be run over text backward.
-- // Construction and execution of prog will
-- // stay within approximately max_mem bytes of memory.
-- // If max_mem <= 0, a reasonable default is used.
-- Prog* CompileToProg(int64 max_mem);
-- Prog* CompileToReverseProg(int64 max_mem);
--
-- // Whether to expect this library to find exactly the same answer as PCRE
-- // when running this regexp. Most regexps do mimic PCRE exactly, but a few
-- // obscure cases behave differently. Technically this is more a property
-- // of the Prog than the Regexp, but the computation is much easier to do
-- // on the Regexp. See mimics_pcre.cc for the exact conditions.
-- bool MimicsPCRE();
--
-- // Benchmarking function.
-- void NullWalk();
--
-- // Whether every match of this regexp must be anchored and
-- // begin with a non-empty fixed string (perhaps after ASCII
-- // case-folding). If so, returns the prefix and the sub-regexp that
-- // follows it.
-- bool RequiredPrefix(string* prefix, bool *foldcase, Regexp** suffix);
--
-- private:
-- // Constructor allocates vectors as appropriate for operator.
-- explicit Regexp(RegexpOp op, ParseFlags parse_flags);
--
-- // Use Decref() instead of delete to release Regexps.
-- // This is private to catch deletes at compile time.
-- ~Regexp();
-- void Destroy();
-- bool QuickDestroy();
--
-- // Helpers for Parse. Listed here so they can edit Regexps.
-- class ParseState;
-- friend class ParseState;
-- friend bool ParseCharClass(StringPiece* s, Regexp** out_re,
-- RegexpStatus* status);
--
-- // Helper for testing [sic].
-- friend bool RegexpEqualTestingOnly(Regexp*, Regexp*);
--
-- // Computes whether Regexp is already simple.
-- bool ComputeSimple();
--
-- // Constructor that generates a concatenation or alternation,
-- // enforcing the limit on the number of subexpressions for
-- // a particular Regexp.
-- static Regexp* ConcatOrAlternate(RegexpOp op, Regexp** subs, int nsubs,
-- ParseFlags flags, bool can_factor);
--
-- // Returns the leading string that re starts with.
-- // The returned Rune* points into a piece of re,
-- // so it must not be used after the caller calls re->Decref().
-- static Rune* LeadingString(Regexp* re, int* nrune, ParseFlags* flags);
--
-- // Removes the first n leading runes from the beginning of re.
-- // Edits re in place.
-- static void RemoveLeadingString(Regexp* re, int n);
--
-- // Returns the leading regexp in re's top-level concatenation.
-- // The returned Regexp* points at re or a sub-expression of re,
-- // so it must not be used after the caller calls re->Decref().
-- static Regexp* LeadingRegexp(Regexp* re);
--
-- // Removes LeadingRegexp(re) from re and returns the remainder.
-- // Might edit re in place.
-- static Regexp* RemoveLeadingRegexp(Regexp* re);
--
-- // Simplifies an alternation of literal strings by factoring out
-- // common prefixes.
-- static int FactorAlternation(Regexp** sub, int nsub, ParseFlags flags);
-- static int FactorAlternationRecursive(Regexp** sub, int nsub,
-- ParseFlags flags, int maxdepth);
--
-- // Is a == b? Only efficient on regexps that have not been through
-- // Simplify yet - the expansion of a kRegexpRepeat will make this
-- // take a long time. Do not call on such regexps, hence private.
-- static bool Equal(Regexp* a, Regexp* b);
--
-- // Allocate space for n sub-regexps.
-- void AllocSub(int n) {
-- if (n < 0 || static_cast<uint16>(n) != n)
-- LOG(FATAL) << "Cannot AllocSub " << n;
-- if (n > 1)
-- submany_ = new Regexp*[n];
-- nsub_ = n;
-- }
--
-- // Add Rune to LiteralString
-- void AddRuneToString(Rune r);
--
-- // Swaps this with that, in place.
-- void Swap(Regexp *that);
--
-- // Operator. See description of operators above.
-- // uint8 instead of RegexpOp to control space usage.
-- uint8 op_;
--
-- // Is this regexp structure already simple
-- // (has it been returned by Simplify)?
-- // uint8 instead of bool to control space usage.
-- uint8 simple_;
--
-- // Flags saved from parsing and used during execution.
-- // (Only FoldCase is used.)
-- // uint16 instead of ParseFlags to control space usage.
-- uint16 parse_flags_;
--
-- // Reference count. Exists so that SimplifyRegexp can build
-- // regexp structures that are dags rather than trees to avoid
-- // exponential blowup in space requirements.
-- // uint16 to control space usage.
-- // The standard regexp routines will never generate a
-- // ref greater than the maximum repeat count (100),
-- // but even so, Incref and Decref consult an overflow map
-- // when ref_ reaches kMaxRef.
-- uint16 ref_;
-- static const uint16 kMaxRef = 0xffff;
--
-- // Subexpressions.
-- // uint16 to control space usage.
-- // Concat and Alternate handle larger numbers of subexpressions
-- // by building concatenation or alternation trees.
-- // Other routines should call Concat or Alternate instead of
-- // filling in sub() by hand.
-- uint16 nsub_;
-- static const uint16 kMaxNsub = 0xffff;
-- union {
-- Regexp** submany_; // if nsub_ > 1
-- Regexp* subone_; // if nsub_ == 1
-- };
--
-- // Extra space for parse and teardown stacks.
-- Regexp* down_;
--
-- // Arguments to operator. See description of operators above.
-- union {
-- struct { // Repeat
-- int max_;
-- int min_;
-- };
-- struct { // Capture
-- int cap_;
-- string* name_;
-- };
-- struct { // LiteralString
-- int nrunes_;
-- Rune* runes_;
-- };
-- struct { // CharClass
-- // These two could be in separate union members,
-- // but it wouldn't save any space (there are other two-word structs)
-- // and keeping them separate avoids confusion during parsing.
-- CharClass* cc_;
-- CharClassBuilder* ccb_;
-- };
-- Rune rune_; // Literal
-- int match_id_; // HaveMatch
-- void *the_union_[2]; // as big as any other element, for memset
-- };
--
-- DISALLOW_EVIL_CONSTRUCTORS(Regexp);
--};
--
--// Character class set: contains non-overlapping, non-abutting RuneRanges.
--typedef set<RuneRange, RuneRangeLess> RuneRangeSet;
--
--class CharClassBuilder {
-- public:
-- CharClassBuilder();
--
-- typedef RuneRangeSet::iterator iterator;
-- iterator begin() { return ranges_.begin(); }
-- iterator end() { return ranges_.end(); }
--
-- int size() { return nrunes_; }
-- bool empty() { return nrunes_ == 0; }
-- bool full() { return nrunes_ == Runemax+1; }
--
-- bool Contains(Rune r);
-- bool FoldsASCII();
-- bool AddRange(Rune lo, Rune hi); // returns whether class changed
-- CharClassBuilder* Copy();
-- void AddCharClass(CharClassBuilder* cc);
-- void Negate();
-- void RemoveAbove(Rune r);
-- CharClass* GetCharClass();
-- void AddRangeFlags(Rune lo, Rune hi, Regexp::ParseFlags parse_flags);
--
-- private:
-- static const uint32 AlphaMask = (1<<26) - 1;
-- uint32 upper_; // bitmap of A-Z
-- uint32 lower_; // bitmap of a-z
-- int nrunes_;
-- RuneRangeSet ranges_;
-- DISALLOW_EVIL_CONSTRUCTORS(CharClassBuilder);
--};
--
--// Tell g++ that bitwise ops on ParseFlags produce ParseFlags.
--inline Regexp::ParseFlags operator|(Regexp::ParseFlags a, Regexp::ParseFlags b)
--{
-- return static_cast<Regexp::ParseFlags>(static_cast<int>(a) | static_cast<int>(b));
--}
--
--inline Regexp::ParseFlags operator^(Regexp::ParseFlags a, Regexp::ParseFlags b)
--{
-- return static_cast<Regexp::ParseFlags>(static_cast<int>(a) ^ static_cast<int>(b));
--}
--
--inline Regexp::ParseFlags operator&(Regexp::ParseFlags a, Regexp::ParseFlags b)
--{
-- return static_cast<Regexp::ParseFlags>(static_cast<int>(a) & static_cast<int>(b));
--}
--
--inline Regexp::ParseFlags operator~(Regexp::ParseFlags a)
--{
-- return static_cast<Regexp::ParseFlags>(~static_cast<int>(a));
--}
--
--
--
--} // namespace re2
--
--#endif // RE2_REGEXP_H__
-diff --git a/re2/re2/set.cc b/re2/re2/set.cc
-deleted file mode 100644
-index 2bcd30a..0000000
---- a/re2/re2/set.cc
-+++ /dev/null
-@@ -1,113 +0,0 @@
--// Copyright 2010 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--#include "re2/set.h"
--
--#include "util/util.h"
--#include "re2/stringpiece.h"
--#include "re2/prog.h"
--#include "re2/re2.h"
--#include "re2/regexp.h"
--
--using namespace re2;
--
--RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor) {
-- options_.Copy(options);
-- anchor_ = anchor;
-- prog_ = NULL;
-- compiled_ = false;
--}
--
--RE2::Set::~Set() {
-- for (int i = 0; i < re_.size(); i++)
-- re_[i]->Decref();
-- delete prog_;
--}
--
--int RE2::Set::Add(const StringPiece& pattern, string* error) {
-- if (compiled_) {
-- LOG(DFATAL) << "RE2::Set::Add after Compile";
-- return -1;
-- }
--
-- Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
-- options_.ParseFlags());
--
-- RegexpStatus status;
-- re2::Regexp* re = Regexp::Parse(pattern, pf, &status);
-- if (re == NULL) {
-- if (error != NULL)
-- *error = status.Text();
-- if (options_.log_errors())
-- LOG(ERROR) << "Error parsing '" << pattern << "': " << status.Text();
-- return -1;
-- }
--
-- // Concatenate with match index and push on vector.
-- int n = re_.size();
-- re2::Regexp* m = re2::Regexp::HaveMatch(n, pf);
-- if (re->op() == kRegexpConcat) {
-- int nsub = re->nsub();
-- re2::Regexp** sub = new re2::Regexp*[nsub + 1];
-- for (int i = 0; i < nsub; i++)
-- sub[i] = re->sub()[i]->Incref();
-- sub[nsub] = m;
-- re->Decref();
-- re = re2::Regexp::Concat(sub, nsub + 1, pf);
-- delete[] sub;
-- } else {
-- re2::Regexp* sub[2];
-- sub[0] = re;
-- sub[1] = m;
-- re = re2::Regexp::Concat(sub, 2, pf);
-- }
-- re_.push_back(re);
-- return n;
--}
--
--bool RE2::Set::Compile() {
-- if (compiled_) {
-- LOG(DFATAL) << "RE2::Set::Compile multiple times";
-- return false;
-- }
-- compiled_ = true;
--
-- Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
-- options_.ParseFlags());
-- re2::Regexp* re = re2::Regexp::Alternate(const_cast<re2::Regexp**>(&re_[0]),
-- re_.size(), pf);
-- re_.clear();
-- re2::Regexp* sre = re->Simplify();
-- re->Decref();
-- re = sre;
-- if (re == NULL) {
-- if (options_.log_errors())
-- LOG(ERROR) << "Error simplifying during Compile.";
-- return false;
-- }
--
-- prog_ = Prog::CompileSet(options_, anchor_, re);
-- return prog_ != NULL;
--}
--
--bool RE2::Set::Match(const StringPiece& text, vector<int>* v) const {
-- if (!compiled_) {
-- LOG(DFATAL) << "RE2::Set::Match without Compile";
-- return false;
-- }
-- v->clear();
-- bool failed;
-- bool ret = prog_->SearchDFA(text, text, Prog::kAnchored,
-- Prog::kManyMatch, NULL, &failed, v);
-- if (failed)
-- LOG(DFATAL) << "RE2::Set::Match: DFA ran out of cache space";
--
-- if (ret == false)
-- return false;
-- if (v->size() == 0) {
-- LOG(DFATAL) << "RE2::Set::Match: match but unknown regexp set";
-- return false;
-- }
-- return true;
--}
-diff --git a/re2/re2/set.h b/re2/re2/set.h
-deleted file mode 100644
-index d716425..0000000
---- a/re2/re2/set.h
-+++ /dev/null
-@@ -1,55 +0,0 @@
--// Copyright 2010 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--#ifndef RE2_SET_H
--#define RE2_SET_H
--
--#include <utility>
--#include <vector>
--
--#include "re2/re2.h"
--
--namespace re2 {
--using std::vector;
--
--// An RE2::Set represents a collection of regexps that can
--// be searched for simultaneously.
--class RE2::Set {
-- public:
-- Set(const RE2::Options& options, RE2::Anchor anchor);
-- ~Set();
--
-- // Add adds regexp pattern to the set, interpreted using the RE2 options.
-- // (The RE2 constructor's default options parameter is RE2::UTF8.)
-- // Add returns the regexp index that will be used to identify
-- // it in the result of Match, or -1 if the regexp cannot be parsed.
-- // Indices are assigned in sequential order starting from 0.
-- // Error returns do not increment the index.
-- // If an error occurs and error != NULL, *error will hold an error message.
-- int Add(const StringPiece& pattern, string* error);
--
-- // Compile prepares the Set for matching.
-- // Add must not be called again after Compile.
-- // Compile must be called before FullMatch or PartialMatch.
-- // Compile may return false if it runs out of memory.
-- bool Compile();
--
-- // Match returns true if text matches any of the regexps in the set.
-- // If so, it fills v with the indices of the matching regexps.
-- bool Match(const StringPiece& text, vector<int>* v) const;
--
-- private:
-- RE2::Options options_;
-- RE2::Anchor anchor_;
-- vector<re2::Regexp*> re_;
-- re2::Prog* prog_;
-- bool compiled_;
-- //DISALLOW_EVIL_CONSTRUCTORS(Set);
-- Set(const Set&);
-- void operator=(const Set&);
--};
--
--} // namespace re2
--
--#endif // RE2_SET_H
-diff --git a/re2/re2/simplify.cc b/re2/re2/simplify.cc
-deleted file mode 100644
-index faf3208..0000000
---- a/re2/re2/simplify.cc
-+++ /dev/null
-@@ -1,393 +0,0 @@
--// Copyright 2006 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Rewrite POSIX and other features in re
--// to use simple extended regular expression features.
--// Also sort and simplify character classes.
--
--#include "util/util.h"
--#include "re2/regexp.h"
--#include "re2/walker-inl.h"
--
--namespace re2 {
--
--// Parses the regexp src and then simplifies it and sets *dst to the
--// string representation of the simplified form. Returns true on success.
--// Returns false and sets *error (if error != NULL) on error.
--bool Regexp::SimplifyRegexp(const StringPiece& src, ParseFlags flags,
-- string* dst,
-- RegexpStatus* status) {
-- Regexp* re = Parse(src, flags, status);
-- if (re == NULL)
-- return false;
-- Regexp* sre = re->Simplify();
-- re->Decref();
-- if (sre == NULL) {
-- // Should not happen, since Simplify never fails.
-- LOG(ERROR) << "Simplify failed on " << src;
-- if (status) {
-- status->set_code(kRegexpInternalError);
-- status->set_error_arg(src);
-- }
-- return false;
-- }
-- *dst = sre->ToString();
-- sre->Decref();
-- return true;
--}
--
--// Assuming the simple_ flags on the children are accurate,
--// is this Regexp* simple?
--bool Regexp::ComputeSimple() {
-- Regexp** subs;
-- switch (op_) {
-- case kRegexpNoMatch:
-- case kRegexpEmptyMatch:
-- case kRegexpLiteral:
-- case kRegexpLiteralString:
-- case kRegexpBeginLine:
-- case kRegexpEndLine:
-- case kRegexpBeginText:
-- case kRegexpWordBoundary:
-- case kRegexpNoWordBoundary:
-- case kRegexpEndText:
-- case kRegexpAnyChar:
-- case kRegexpAnyByte:
-- case kRegexpHaveMatch:
-- return true;
-- case kRegexpConcat:
-- case kRegexpAlternate:
-- // These are simple as long as the subpieces are simple.
-- subs = sub();
-- for (int i = 0; i < nsub_; i++)
-- if (!subs[i]->simple_)
-- return false;
-- return true;
-- case kRegexpCharClass:
-- // Simple as long as the char class is not empty, not full.
-- if (ccb_ != NULL)
-- return !ccb_->empty() && !ccb_->full();
-- return !cc_->empty() && !cc_->full();
-- case kRegexpCapture:
-- subs = sub();
-- return subs[0]->simple_;
-- case kRegexpStar:
-- case kRegexpPlus:
-- case kRegexpQuest:
-- subs = sub();
-- if (!subs[0]->simple_)
-- return false;
-- switch (subs[0]->op_) {
-- case kRegexpStar:
-- case kRegexpPlus:
-- case kRegexpQuest:
-- case kRegexpEmptyMatch:
-- case kRegexpNoMatch:
-- return false;
-- default:
-- break;
-- }
-- return true;
-- case kRegexpRepeat:
-- return false;
-- }
-- LOG(DFATAL) << "Case not handled in ComputeSimple: " << op_;
-- return false;
--}
--
--// Walker subclass used by Simplify.
--// The simplify walk is purely post-recursive: given the simplified children,
--// PostVisit creates the simplified result.
--// The child_args are simplified Regexp*s.
--class SimplifyWalker : public Regexp::Walker<Regexp*> {
-- public:
-- SimplifyWalker() {}
-- virtual Regexp* PreVisit(Regexp* re, Regexp* parent_arg, bool* stop);
-- virtual Regexp* PostVisit(Regexp* re,
-- Regexp* parent_arg,
-- Regexp* pre_arg,
-- Regexp** child_args, int nchild_args);
-- virtual Regexp* Copy(Regexp* re);
-- virtual Regexp* ShortVisit(Regexp* re, Regexp* parent_arg);
--
-- private:
-- // These functions are declared inside SimplifyWalker so that
-- // they can edit the private fields of the Regexps they construct.
--
-- // Creates a concatenation of two Regexp, consuming refs to re1 and re2.
-- // Caller must Decref return value when done with it.
-- static Regexp* Concat2(Regexp* re1, Regexp* re2, Regexp::ParseFlags flags);
--
-- // Simplifies the expression re{min,max} in terms of *, +, and ?.
-- // Returns a new regexp. Does not edit re. Does not consume reference to re.
-- // Caller must Decref return value when done with it.
-- static Regexp* SimplifyRepeat(Regexp* re, int min, int max,
-- Regexp::ParseFlags parse_flags);
--
-- // Simplifies a character class by expanding any named classes
-- // into rune ranges. Does not edit re. Does not consume ref to re.
-- // Caller must Decref return value when done with it.
-- static Regexp* SimplifyCharClass(Regexp* re);
--
-- DISALLOW_EVIL_CONSTRUCTORS(SimplifyWalker);
--};
--
--// Simplifies a regular expression, returning a new regexp.
--// The new regexp uses traditional Unix egrep features only,
--// plus the Perl (?:) non-capturing parentheses.
--// Otherwise, no POSIX or Perl additions. The new regexp
--// captures exactly the same subexpressions (with the same indices)
--// as the original.
--// Does not edit current object.
--// Caller must Decref() return value when done with it.
--
--Regexp* Regexp::Simplify() {
-- if (simple_)
-- return Incref();
-- SimplifyWalker w;
-- return w.Walk(this, NULL);
--}
--
--#define Simplify DontCallSimplify // Avoid accidental recursion
--
--Regexp* SimplifyWalker::Copy(Regexp* re) {
-- return re->Incref();
--}
--
--Regexp* SimplifyWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
-- // This should never be called, since we use Walk and not
-- // WalkExponential.
-- LOG(DFATAL) << "SimplifyWalker::ShortVisit called";
-- return re->Incref();
--}
--
--Regexp* SimplifyWalker::PreVisit(Regexp* re, Regexp* parent_arg, bool* stop) {
-- if (re->simple_) {
-- *stop = true;
-- return re->Incref();
-- }
-- return NULL;
--}
--
--Regexp* SimplifyWalker::PostVisit(Regexp* re,
-- Regexp* parent_arg,
-- Regexp* pre_arg,
-- Regexp** child_args,
-- int nchild_args) {
-- switch (re->op()) {
-- case kRegexpNoMatch:
-- case kRegexpEmptyMatch:
-- case kRegexpLiteral:
-- case kRegexpLiteralString:
-- case kRegexpBeginLine:
-- case kRegexpEndLine:
-- case kRegexpBeginText:
-- case kRegexpWordBoundary:
-- case kRegexpNoWordBoundary:
-- case kRegexpEndText:
-- case kRegexpAnyChar:
-- case kRegexpAnyByte:
-- case kRegexpHaveMatch:
-- // All these are always simple.
-- re->simple_ = true;
-- return re->Incref();
--
-- case kRegexpConcat:
-- case kRegexpAlternate: {
-- // These are simple as long as the subpieces are simple.
-- // Two passes to avoid allocation in the common case.
-- bool changed = false;
-- Regexp** subs = re->sub();
-- for (int i = 0; i < re->nsub_; i++) {
-- Regexp* sub = subs[i];
-- Regexp* newsub = child_args[i];
-- if (newsub != sub) {
-- changed = true;
-- break;
-- }
-- }
-- if (!changed) {
-- for (int i = 0; i < re->nsub_; i++) {
-- Regexp* newsub = child_args[i];
-- newsub->Decref();
-- }
-- re->simple_ = true;
-- return re->Incref();
-- }
-- Regexp* nre = new Regexp(re->op(), re->parse_flags());
-- nre->AllocSub(re->nsub_);
-- Regexp** nre_subs = nre->sub();
-- for (int i = 0; i <re->nsub_; i++)
-- nre_subs[i] = child_args[i];
-- nre->simple_ = true;
-- return nre;
-- }
--
-- case kRegexpCapture: {
-- Regexp* newsub = child_args[0];
-- if (newsub == re->sub()[0]) {
-- newsub->Decref();
-- re->simple_ = true;
-- return re->Incref();
-- }
-- Regexp* nre = new Regexp(kRegexpCapture, re->parse_flags());
-- nre->AllocSub(1);
-- nre->sub()[0] = newsub;
-- nre->cap_ = re->cap_;
-- nre->simple_ = true;
-- return nre;
-- }
--
-- case kRegexpStar:
-- case kRegexpPlus:
-- case kRegexpQuest: {
-- Regexp* newsub = child_args[0];
-- // Special case: repeat the empty string as much as
-- // you want, but it's still the empty string.
-- if (newsub->op() == kRegexpEmptyMatch)
-- return newsub;
--
-- // These are simple as long as the subpiece is simple.
-- if (newsub == re->sub()[0]) {
-- newsub->Decref();
-- re->simple_ = true;
-- return re->Incref();
-- }
--
-- // These are also idempotent if flags are constant.
-- if (re->op() == newsub->op() &&
-- re->parse_flags() == newsub->parse_flags())
-- return newsub;
--
-- Regexp* nre = new Regexp(re->op(), re->parse_flags());
-- nre->AllocSub(1);
-- nre->sub()[0] = newsub;
-- nre->simple_ = true;
-- return nre;
-- }
--
-- case kRegexpRepeat: {
-- Regexp* newsub = child_args[0];
-- // Special case: repeat the empty string as much as
-- // you want, but it's still the empty string.
-- if (newsub->op() == kRegexpEmptyMatch)
-- return newsub;
--
-- Regexp* nre = SimplifyRepeat(newsub, re->min_, re->max_,
-- re->parse_flags());
-- newsub->Decref();
-- nre->simple_ = true;
-- return nre;
-- }
--
-- case kRegexpCharClass: {
-- Regexp* nre = SimplifyCharClass(re);
-- nre->simple_ = true;
-- return nre;
-- }
-- }
--
-- LOG(ERROR) << "Simplify case not handled: " << re->op();
-- return re->Incref();
--}
--
--// Creates a concatenation of two Regexp, consuming refs to re1 and re2.
--// Returns a new Regexp, handing the ref to the caller.
--Regexp* SimplifyWalker::Concat2(Regexp* re1, Regexp* re2,
-- Regexp::ParseFlags parse_flags) {
-- Regexp* re = new Regexp(kRegexpConcat, parse_flags);
-- re->AllocSub(2);
-- Regexp** subs = re->sub();
-- subs[0] = re1;
-- subs[1] = re2;
-- return re;
--}
--
--// Simplifies the expression re{min,max} in terms of *, +, and ?.
--// Returns a new regexp. Does not edit re. Does not consume reference to re.
--// Caller must Decref return value when done with it.
--// The result will *not* necessarily have the right capturing parens
--// if you call ToString() and re-parse it: (x){2} becomes (x)(x),
--// but in the Regexp* representation, both (x) are marked as $1.
--Regexp* SimplifyWalker::SimplifyRepeat(Regexp* re, int min, int max,
-- Regexp::ParseFlags f) {
-- // x{n,} means at least n matches of x.
-- if (max == -1) {
-- // Special case: x{0,} is x*
-- if (min == 0)
-- return Regexp::Star(re->Incref(), f);
--
-- // Special case: x{1,} is x+
-- if (min == 1)
-- return Regexp::Plus(re->Incref(), f);
--
-- // General case: x{4,} is xxxx+
-- Regexp* nre = new Regexp(kRegexpConcat, f);
-- nre->AllocSub(min);
-- VLOG(1) << "Simplify " << min;
-- Regexp** nre_subs = nre->sub();
-- for (int i = 0; i < min-1; i++)
-- nre_subs[i] = re->Incref();
-- nre_subs[min-1] = Regexp::Plus(re->Incref(), f);
-- return nre;
-- }
--
-- // Special case: (x){0} matches only empty string.
-- if (min == 0 && max == 0)
-- return new Regexp(kRegexpEmptyMatch, f);
--
-- // Special case: x{1} is just x.
-- if (min == 1 && max == 1)
-- return re->Incref();
--
-- // General case: x{n,m} means n copies of x and m copies of x?.
-- // The machine will do less work if we nest the final m copies,
-- // so that x{2,5} = xx(x(x(x)?)?)?
--
-- // Build leading prefix: xx. Capturing only on the last one.
-- Regexp* nre = NULL;
-- if (min > 0) {
-- nre = new Regexp(kRegexpConcat, f);
-- nre->AllocSub(min);
-- Regexp** nre_subs = nre->sub();
-- for (int i = 0; i < min; i++)
-- nre_subs[i] = re->Incref();
-- }
--
-- // Build and attach suffix: (x(x(x)?)?)?
-- if (max > min) {
-- Regexp* suf = Regexp::Quest(re->Incref(), f);
-- for (int i = min+1; i < max; i++)
-- suf = Regexp::Quest(Concat2(re->Incref(), suf, f), f);
-- if (nre == NULL)
-- nre = suf;
-- else
-- nre = Concat2(nre, suf, f);
-- }
--
-- if (nre == NULL) {
-- // Some degenerate case, like min > max, or min < max < 0.
-- // This shouldn't happen, because the parser rejects such regexps.
-- LOG(DFATAL) << "Malformed repeat " << re->ToString() << " " << min << " " << max;
-- return new Regexp(kRegexpNoMatch, f);
-- }
--
-- return nre;
--}
--
--// Simplifies a character class.
--// Caller must Decref return value when done with it.
--Regexp* SimplifyWalker::SimplifyCharClass(Regexp* re) {
-- CharClass* cc = re->cc();
--
-- // Special cases
-- if (cc->empty())
-- return new Regexp(kRegexpNoMatch, re->parse_flags());
-- if (cc->full())
-- return new Regexp(kRegexpAnyChar, re->parse_flags());
--
-- return re->Incref();
--}
--
--} // namespace re2
-diff --git a/re2/re2/stringpiece.h b/re2/re2/stringpiece.h
-deleted file mode 100644
-index ab9297c..0000000
---- a/re2/re2/stringpiece.h
-+++ /dev/null
-@@ -1,182 +0,0 @@
--// Copyright 2001-2010 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// A string-like object that points to a sized piece of memory.
--//
--// Functions or methods may use const StringPiece& parameters to accept either
--// a "const char*" or a "string" value that will be implicitly converted to
--// a StringPiece. The implicit conversion means that it is often appropriate
--// to include this .h file in other files rather than forward-declaring
--// StringPiece as would be appropriate for most other Google classes.
--//
--// Systematic usage of StringPiece is encouraged as it will reduce unnecessary
--// conversions from "const char*" to "string" and back again.
--//
--//
--// Arghh! I wish C++ literals were "string".
--
--#ifndef STRINGS_STRINGPIECE_H__
--#define STRINGS_STRINGPIECE_H__
--
--#include <string.h>
--#include <cstddef>
--#include <iosfwd>
--#include <string>
--
--namespace re2 {
--
--class StringPiece {
-- private:
-- const char* ptr_;
-- int length_;
--
-- public:
-- // We provide non-explicit singleton constructors so users can pass
-- // in a "const char*" or a "string" wherever a "StringPiece" is
-- // expected.
-- StringPiece() : ptr_(NULL), length_(0) { }
-- StringPiece(const char* str)
-- : ptr_(str), length_((str == NULL) ? 0 : static_cast<int>(strlen(str))) { }
-- StringPiece(const std::string& str)
-- : ptr_(str.data()), length_(static_cast<int>(str.size())) { }
-- StringPiece(const char* offset, int len) : ptr_(offset), length_(len) { }
--
-- // data() may return a pointer to a buffer with embedded NULs, and the
-- // returned buffer may or may not be null terminated. Therefore it is
-- // typically a mistake to pass data() to a routine that expects a NUL
-- // terminated string.
-- const char* data() const { return ptr_; }
-- int size() const { return length_; }
-- int length() const { return length_; }
-- bool empty() const { return length_ == 0; }
--
-- void clear() { ptr_ = NULL; length_ = 0; }
-- void set(const char* data, int len) { ptr_ = data; length_ = len; }
-- void set(const char* str) {
-- ptr_ = str;
-- if (str != NULL)
-- length_ = static_cast<int>(strlen(str));
-- else
-- length_ = 0;
-- }
-- void set(const void* data, int len) {
-- ptr_ = reinterpret_cast<const char*>(data);
-- length_ = len;
-- }
--
-- char operator[](int i) const { return ptr_[i]; }
--
-- void remove_prefix(int n) {
-- ptr_ += n;
-- length_ -= n;
-- }
--
-- void remove_suffix(int n) {
-- length_ -= n;
-- }
--
-- int compare(const StringPiece& x) const {
-- int r = memcmp(ptr_, x.ptr_, std::min(length_, x.length_));
-- if (r == 0) {
-- if (length_ < x.length_) r = -1;
-- else if (length_ > x.length_) r = +1;
-- }
-- return r;
-- }
--
-- std::string as_string() const {
-- return std::string(data(), size());
-- }
-- // We also define ToString() here, since many other string-like
-- // interfaces name the routine that converts to a C++ string
-- // "ToString", and it's confusing to have the method that does that
-- // for a StringPiece be called "as_string()". We also leave the
-- // "as_string()" method defined here for existing code.
-- std::string ToString() const {
-- return std::string(data(), size());
-- }
--
-- void CopyToString(std::string* target) const;
-- void AppendToString(std::string* target) const;
--
-- // Does "this" start with "x"
-- bool starts_with(const StringPiece& x) const {
-- return ((length_ >= x.length_) &&
-- (memcmp(ptr_, x.ptr_, x.length_) == 0));
-- }
--
-- // Does "this" end with "x"
-- bool ends_with(const StringPiece& x) const {
-- return ((length_ >= x.length_) &&
-- (memcmp(ptr_ + (length_-x.length_), x.ptr_, x.length_) == 0));
-- }
--
-- // standard STL container boilerplate
-- typedef char value_type;
-- typedef const char* pointer;
-- typedef const char& reference;
-- typedef const char& const_reference;
-- typedef size_t size_type;
-- typedef ptrdiff_t difference_type;
-- static const size_type npos;
-- typedef const char* const_iterator;
-- typedef const char* iterator;
-- typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
-- typedef std::reverse_iterator<iterator> reverse_iterator;
-- iterator begin() const { return ptr_; }
-- iterator end() const { return ptr_ + length_; }
-- const_reverse_iterator rbegin() const {
-- return const_reverse_iterator(ptr_ + length_);
-- }
-- const_reverse_iterator rend() const {
-- return const_reverse_iterator(ptr_);
-- }
-- // STLS says return size_type, but Google says return int
-- int max_size() const { return length_; }
-- int capacity() const { return length_; }
--
-- int copy(char* buf, size_type n, size_type pos = 0) const;
--
-- int find(const StringPiece& s, size_type pos = 0) const;
-- int find(char c, size_type pos = 0) const;
-- int rfind(const StringPiece& s, size_type pos = npos) const;
-- int rfind(char c, size_type pos = npos) const;
--
-- StringPiece substr(size_type pos, size_type n = npos) const;
--
-- static bool _equal(const StringPiece&, const StringPiece&);
--};
--
--inline bool operator==(const StringPiece& x, const StringPiece& y) {
-- return StringPiece::_equal(x, y);
--}
--
--inline bool operator!=(const StringPiece& x, const StringPiece& y) {
-- return !(x == y);
--}
--
--inline bool operator<(const StringPiece& x, const StringPiece& y) {
-- const int r = memcmp(x.data(), y.data(),
-- std::min(x.size(), y.size()));
-- return ((r < 0) || ((r == 0) && (x.size() < y.size())));
--}
--
--inline bool operator>(const StringPiece& x, const StringPiece& y) {
-- return y < x;
--}
--
--inline bool operator<=(const StringPiece& x, const StringPiece& y) {
-- return !(x > y);
--}
--
--inline bool operator>=(const StringPiece& x, const StringPiece& y) {
-- return !(x < y);
--}
--
--} // namespace re2
--
--// allow StringPiece to be logged
--extern std::ostream& operator<<(std::ostream& o, const re2::StringPiece& piece);
--
--#endif // STRINGS_STRINGPIECE_H__
-diff --git a/re2/re2/testing/backtrack.cc b/re2/re2/testing/backtrack.cc
-deleted file mode 100644
-index b2dd6db..0000000
---- a/re2/re2/testing/backtrack.cc
-+++ /dev/null
-@@ -1,254 +0,0 @@
--// Copyright 2008 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Tested by search_test.cc, exhaustive_test.cc, tester.cc
--//
--// Prog::BadSearchBacktrack is a backtracking regular expression search,
--// except that it remembers where it has been, trading a lot of
--// memory for a lot of time. It exists only for testing purposes.
--//
--// Let me repeat that.
--//
--// THIS CODE SHOULD NEVER BE USED IN PRODUCTION:
--// - It uses a ton of memory.
--// - It uses a ton of stack.
--// - It uses CHECK and LOG(FATAL).
--// - It implements unanchored search by repeated anchored search.
--//
--// On the other hand, it is very simple and a good reference
--// implementation for the more complicated regexp packages.
--//
--// In BUILD, this file is linked into the ":testing" library,
--// not the main library, in order to make it harder to pick up
--// accidentally.
--
--#include "util/util.h"
--#include "re2/prog.h"
--#include "re2/regexp.h"
--
--namespace re2 {
--
--// Backtracker holds the state for a backtracking search.
--//
--// Excluding the search parameters, the main search state
--// is just the "capture registers", which record, for the
--// current execution, the string position at which each
--// parenthesis was passed. cap_[0] and cap_[1] are the
--// left and right parenthesis in $0, cap_[2] and cap_[3] in $1, etc.
--//
--// To avoid infinite loops during backtracking on expressions
--// like (a*)*, the visited_[] bitmap marks the (state, string-position)
--// pairs that have already been explored and are thus not worth
--// re-exploring if we get there via another path. Modern backtracking
--// libraries engineer their program representation differently, to make
--// such infinite loops possible to avoid without keeping a giant visited_
--// bitmap, but visited_ works fine for a reference implementation
--// and it has the nice benefit of making the search run in linear time.
--class Backtracker {
-- public:
-- explicit Backtracker(Prog* prog);
-- ~Backtracker();
--
-- bool Search(const StringPiece& text, const StringPiece& context,
-- bool anchored, bool longest,
-- StringPiece* submatch, int nsubmatch);
--
-- private:
-- // Explores from instruction ip at string position p looking for a match.
-- // Returns true if found (so that caller can stop trying other possibilities).
-- bool Visit(int id, const char* p);
--
-- // Search parameters
-- Prog* prog_; // program being run
-- StringPiece text_; // text being searched
-- StringPiece context_; // greater context of text being searched
-- bool anchored_; // whether search is anchored at text.begin()
-- bool longest_; // whether search wants leftmost-longest match
-- bool endmatch_; // whether search must end at text.end()
-- StringPiece *submatch_; // submatches to fill in
-- int nsubmatch_; // # of submatches to fill in
--
-- // Search state
-- const char* cap_[64]; // capture registers
-- uint32 *visited_; // bitmap: (Inst*, char*) pairs already backtracked
-- int nvisited_; // # of words in bitmap
--};
--
--Backtracker::Backtracker(Prog* prog)
-- : prog_(prog),
-- anchored_(false),
-- longest_(false),
-- endmatch_(false),
-- submatch_(NULL),
-- nsubmatch_(0),
-- visited_(NULL),
-- nvisited_(0) {
--}
--
--Backtracker::~Backtracker() {
-- delete[] visited_;
--}
--
--// Runs a backtracking search.
--bool Backtracker::Search(const StringPiece& text, const StringPiece& context,
-- bool anchored, bool longest,
-- StringPiece* submatch, int nsubmatch) {
-- text_ = text;
-- context_ = context;
-- if (context_.begin() == NULL)
-- context_ = text;
-- if (prog_->anchor_start() && text.begin() > context_.begin())
-- return false;
-- if (prog_->anchor_end() && text.end() < context_.end())
-- return false;
-- anchored_ = anchored | prog_->anchor_start();
-- longest_ = longest | prog_->anchor_end();
-- endmatch_ = prog_->anchor_end();
-- submatch_ = submatch;
-- nsubmatch_ = nsubmatch;
-- CHECK(2*nsubmatch_ < arraysize(cap_));
-- memset(cap_, 0, sizeof cap_);
--
-- // We use submatch_[0] for our own bookkeeping,
-- // so it had better exist.
-- StringPiece sp0;
-- if (nsubmatch < 1) {
-- submatch_ = &sp0;
-- nsubmatch_ = 1;
-- }
-- submatch_[0] = NULL;
--
-- // Allocate new visited_ bitmap -- size is proportional
-- // to text, so have to reallocate on each call to Search.
-- delete[] visited_;
-- nvisited_ = (prog_->size()*(text.size()+1) + 31)/32;
-- visited_ = new uint32[nvisited_];
-- memset(visited_, 0, nvisited_*sizeof visited_[0]);
--
-- // Anchored search must start at text.begin().
-- if (anchored_) {
-- cap_[0] = text.begin();
-- return Visit(prog_->start(), text.begin());
-- }
--
-- // Unanchored search, starting from each possible text position.
-- // Notice that we have to try the empty string at the end of
-- // the text, so the loop condition is p <= text.end(), not p < text.end().
-- for (const char* p = text.begin(); p <= text.end(); p++) {
-- cap_[0] = p;
-- if (Visit(prog_->start(), p)) // Match must be leftmost; done.
-- return true;
-- }
-- return false;
--}
--
--// Explores from instruction ip at string position p looking for a match.
--// Return true if found (so that caller can stop trying other possibilities).
--bool Backtracker::Visit(int id, const char* p) {
-- // Check bitmap. If we've already explored from here,
-- // either it didn't match or it did but we're hoping for a better match.
-- // Either way, don't go down that road again.
-- CHECK(p <= text_.end());
-- int n = id*(text_.size()+1) + (p - text_.begin());
-- CHECK_LT(n/32, nvisited_);
-- if (visited_[n/32] & (1 << (n&31)))
-- return false;
-- visited_[n/32] |= 1 << (n&31);
--
-- // Pick out byte at current position. If at end of string,
-- // have to explore in hope of finishing a match. Use impossible byte -1.
-- int c = -1;
-- if (p < text_.end())
-- c = *p & 0xFF;
--
-- Prog::Inst* ip = prog_->inst(id);
-- switch (ip->opcode()) {
-- default:
-- LOG(FATAL) << "Unexpected opcode: " << (int)ip->opcode();
-- return false; // not reached
--
-- case kInstAlt:
-- case kInstAltMatch:
-- // Try both possible next states: out is preferred to out1.
-- if (Visit(ip->out(), p)) {
-- if (longest_)
-- Visit(ip->out1(), p);
-- return true;
-- }
-- return Visit(ip->out1(), p);
--
-- case kInstByteRange:
-- if (ip->Matches(c))
-- return Visit(ip->out(), p+1);
-- return false;
--
-- case kInstCapture:
-- if (0 <= ip->cap() && ip->cap() < arraysize(cap_)) {
-- // Capture p to register, but save old value.
-- const char* q = cap_[ip->cap()];
-- cap_[ip->cap()] = p;
-- bool ret = Visit(ip->out(), p);
-- // Restore old value as we backtrack.
-- cap_[ip->cap()] = q;
-- return ret;
-- }
-- return Visit(ip->out(), p);
--
-- case kInstEmptyWidth:
-- if (ip->empty() & ~Prog::EmptyFlags(context_, p))
-- return false;
-- return Visit(ip->out(), p);
--
-- case kInstNop:
-- return Visit(ip->out(), p);
--
-- case kInstMatch:
-- // We found a match. If it's the best so far, record the
-- // parameters in the caller's submatch_ array.
-- if (endmatch_ && p != context_.end())
-- return false;
-- cap_[1] = p;
-- if (submatch_[0].data() == NULL || // First match so far ...
-- (longest_ && p > submatch_[0].end())) { // ... or better match
-- for (int i = 0; i < nsubmatch_; i++)
-- submatch_[i] = StringPiece(cap_[2*i], cap_[2*i+1] - cap_[2*i]);
-- }
-- return true;
--
-- case kInstFail:
-- return false;
-- }
--}
--
--// Runs a backtracking search.
--bool Prog::UnsafeSearchBacktrack(const StringPiece& text,
-- const StringPiece& context,
-- Anchor anchor,
-- MatchKind kind,
-- StringPiece* match,
-- int nmatch) {
-- // If full match, we ask for an anchored longest match
-- // and then check that match[0] == text.
-- // So make sure match[0] exists.
-- StringPiece sp0;
-- if (kind == kFullMatch) {
-- anchor = kAnchored;
-- if (nmatch < 1) {
-- match = &sp0;
-- nmatch = 1;
-- }
-- }
--
-- // Run the search.
-- Backtracker b(this);
-- bool anchored = anchor == kAnchored;
-- bool longest = kind != kFirstMatch;
-- if (!b.Search(text, context, anchored, longest, match, nmatch))
-- return false;
-- if (kind == kFullMatch && match[0].end() != text.end())
-- return false;
-- return true;
--}
--
--} // namespace re2
-diff --git a/re2/re2/testing/charclass_test.cc b/re2/re2/testing/charclass_test.cc
-deleted file mode 100644
-index a3764d4..0000000
---- a/re2/re2/testing/charclass_test.cc
-+++ /dev/null
-@@ -1,223 +0,0 @@
--// Copyright 2006 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Test character class manipulations.
--
--#include "util/test.h"
--#include "re2/regexp.h"
--
--namespace re2 {
--
--struct CCTest {
-- struct {
-- Rune lo;
-- Rune hi;
-- } add[10];
-- int remove;
-- struct {
-- Rune lo;
-- Rune hi;
-- } final[10];
--};
--
--static CCTest tests[] = {
-- { { { 10, 20 }, {-1} }, -1,
-- { { 10, 20 }, {-1} } },
--
-- { { { 10, 20 }, { 20, 30 }, {-1} }, -1,
-- { { 10, 30 }, {-1} } },
--
-- { { { 10, 20 }, { 30, 40 }, { 20, 30 }, {-1} }, -1,
-- { { 10, 40 }, {-1} } },
--
-- { { { 0, 50 }, { 20, 30 }, {-1} }, -1,
-- { { 0, 50 }, {-1} } },
--
-- { { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} }, -1,
-- { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } },
--
-- { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, {-1} }, -1,
-- { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } },
--
-- { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, {-1} }, -1,
-- { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } },
--
-- { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, { 5, 25 }, {-1} }, -1,
-- { { 5, 25 }, {-1} } },
--
-- { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, { 12, 21 }, {-1} }, -1,
-- { { 10, 23 }, {-1} } },
--
-- // These check boundary cases during negation.
-- { { { 0, Runemax }, {-1} }, -1,
-- { { 0, Runemax }, {-1} } },
--
-- { { { 0, 50 }, {-1} }, -1,
-- { { 0, 50 }, {-1} } },
--
-- { { { 50, Runemax }, {-1} }, -1,
-- { { 50, Runemax }, {-1} } },
--
-- // Check RemoveAbove.
-- { { { 50, Runemax }, {-1} }, 255,
-- { { 50, 255 }, {-1} } },
--
-- { { { 50, Runemax }, {-1} }, 65535,
-- { { 50, 65535 }, {-1} } },
--
-- { { { 50, Runemax }, {-1} }, Runemax,
-- { { 50, Runemax }, {-1} } },
--
-- { { { 50, 60 }, { 250, 260 }, { 350, 360 }, {-1} }, 255,
-- { { 50, 60 }, { 250, 255 }, {-1} } },
--
-- { { { 50, 60 }, {-1} }, 255,
-- { { 50, 60 }, {-1} } },
--
-- { { { 350, 360 }, {-1} }, 255,
-- { {-1} } },
--
-- { { {-1} }, 255,
-- { {-1} } },
--};
--
--template<class CharClass>
--static void Broke(const char *desc, const CCTest* t, CharClass* cc) {
-- if (t == NULL) {
-- printf("\t%s:", desc);
-- } else {
-- printf("\n");
-- printf("CharClass added: [%s]", desc);
-- for (int k = 0; t->add[k].lo >= 0; k++)
-- printf(" %d-%d", t->add[k].lo, t->add[k].hi);
-- printf("\n");
-- if (t->remove >= 0)
-- printf("Removed > %d\n", t->remove);
-- printf("\twant:");
-- for (int k = 0; t->final[k].lo >= 0; k++)
-- printf(" %d-%d", t->final[k].lo, t->final[k].hi);
-- printf("\n");
-- printf("\thave:");
-- }
--
-- for (typename CharClass::iterator it = cc->begin(); it != cc->end(); ++it)
-- printf(" %d-%d", it->lo, it->hi);
-- printf("\n");
--}
--
--bool ShouldContain(CCTest *t, int x) {
-- for (int j = 0; t->final[j].lo >= 0; j++)
-- if (t->final[j].lo <= x && x <= t->final[j].hi)
-- return true;
-- return false;
--}
--
--// Helpers to make templated CorrectCC work with both CharClass and CharClassBuilder.
--
--CharClass* Negate(CharClass *cc) {
-- return cc->Negate();
--}
--
--void Delete(CharClass* cc) {
-- cc->Delete();
--}
--
--CharClassBuilder* Negate(CharClassBuilder* cc) {
-- CharClassBuilder* ncc = cc->Copy();
-- ncc->Negate();
-- return ncc;
--}
--
--void Delete(CharClassBuilder* cc) {
-- delete cc;
--}
--
--template<class CharClass>
--bool CorrectCC(CharClass *cc, CCTest *t, const char *desc) {
-- typename CharClass::iterator it = cc->begin();
-- int size = 0;
-- for (int j = 0; t->final[j].lo >= 0; j++, ++it) {
-- if (it == cc->end() ||
-- it->lo != t->final[j].lo ||
-- it->hi != t->final[j].hi) {
-- Broke(desc, t, cc);
-- return false;
-- }
-- size += it->hi - it->lo + 1;
-- }
-- if (it != cc->end()) {
-- Broke(desc, t, cc);
-- return false;
-- }
-- if (cc->size() != size) {
-- Broke(desc, t, cc);
-- printf("wrong size: want %d have %d\n", size, cc->size());
-- return false;
-- }
--
-- for (int j = 0; j < 101; j++) {
-- if (j == 100)
-- j = Runemax;
-- if (ShouldContain(t, j) != cc->Contains(j)) {
-- Broke(desc, t, cc);
-- printf("want contains(%d)=%d, got %d\n",
-- j, ShouldContain(t, j), cc->Contains(j));
-- return false;
-- }
-- }
--
-- CharClass* ncc = Negate(cc);
-- for (int j = 0; j < 101; j++) {
-- if (j == 100)
-- j = Runemax;
-- if (ShouldContain(t, j) == ncc->Contains(j)) {
-- Broke(desc, t, cc);
-- Broke("ncc", NULL, ncc);
-- printf("want ncc contains(%d)!=%d, got %d\n",
-- j, ShouldContain(t, j), ncc->Contains(j));
-- Delete(ncc);
-- return false;
-- }
-- if (ncc->size() != Runemax+1 - cc->size()) {
-- Broke(desc, t, cc);
-- Broke("ncc", NULL, ncc);
-- printf("ncc size should be %d is %d\n",
-- Runemax+1 - cc->size(), ncc->size());
-- Delete(ncc);
-- return false;
-- }
-- }
-- Delete(ncc);
-- return true;
--}
--
--TEST(TestCharClassBuilder, Adds) {
-- int nfail = 0;
-- for (int i = 0; i < arraysize(tests); i++) {
-- CharClassBuilder ccb;
-- CCTest* t = &tests[i];
-- for (int j = 0; t->add[j].lo >= 0; j++)
-- ccb.AddRange(t->add[j].lo, t->add[j].hi);
-- if (t->remove >= 0)
-- ccb.RemoveAbove(t->remove);
-- if (!CorrectCC(&ccb, t, "before copy (CharClassBuilder)"))
-- nfail++;
-- CharClass* cc = ccb.GetCharClass();
-- if (!CorrectCC(cc, t, "before copy (CharClass)"))
-- nfail++;
-- cc->Delete();
--
-- CharClassBuilder *ccb1 = ccb.Copy();
-- if (!CorrectCC(ccb1, t, "after copy (CharClassBuilder)"))
-- nfail++;
-- cc = ccb.GetCharClass();
-- if (!CorrectCC(cc, t, "after copy (CharClass)"))
-- nfail++;
-- cc->Delete();
-- delete ccb1;
-- }
-- EXPECT_EQ(nfail, 0);
--}
--
--} // namespace re2
-diff --git a/re2/re2/testing/compile_test.cc b/re2/re2/testing/compile_test.cc
-deleted file mode 100644
-index 8d92105..0000000
---- a/re2/re2/testing/compile_test.cc
-+++ /dev/null
-@@ -1,171 +0,0 @@
--// Copyright 2007 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Test prog.cc, compile.cc
--
--#include <string>
--#include <vector>
--#include "util/test.h"
--#include "re2/regexp.h"
--#include "re2/prog.h"
--
--DEFINE_string(show, "", "regular expression to compile and dump");
--
--namespace re2 {
--
--// Simple input/output tests checking that
--// the regexp compiles to the expected code.
--// These are just to sanity check the basic implementation.
--// The real confidence tests happen by testing the NFA/DFA
--// that run the compiled code.
--
--struct Test {
-- const char* regexp;
-- const char* code;
--};
--
--static Test tests[] = {
-- { "a",
-- "1. byte [61-61] -> 2\n"
-- "2. match! 0\n" },
-- { "ab",
-- "1. byte [61-61] -> 2\n"
-- "2. byte [62-62] -> 3\n"
-- "3. match! 0\n" },
-- { "a|c",
-- "3. alt -> 1 | 2\n"
-- "1. byte [61-61] -> 4\n"
-- "2. byte [63-63] -> 4\n"
-- "4. match! 0\n" },
-- { "a|b",
-- "1. byte [61-62] -> 2\n"
-- "2. match! 0\n" },
-- { "[ab]",
-- "1. byte [61-62] -> 2\n"
-- "2. match! 0\n" },
-- { "a+",
-- "1. byte [61-61] -> 2\n"
-- "2. alt -> 1 | 3\n"
-- "3. match! 0\n" },
-- { "a+?",
-- "1. byte [61-61] -> 2\n"
-- "2. alt -> 3 | 1\n"
-- "3. match! 0\n" },
-- { "a*",
-- "2. alt -> 1 | 3\n"
-- "1. byte [61-61] -> 2\n"
-- "3. match! 0\n" },
-- { "a*?",
-- "2. alt -> 3 | 1\n"
-- "3. match! 0\n"
-- "1. byte [61-61] -> 2\n" },
-- { "a?",
-- "2. alt -> 1 | 3\n"
-- "1. byte [61-61] -> 3\n"
-- "3. match! 0\n" },
-- { "a??",
-- "2. alt -> 3 | 1\n"
-- "3. match! 0\n"
-- "1. byte [61-61] -> 3\n" },
-- { "a{4}",
-- "1. byte [61-61] -> 2\n"
-- "2. byte [61-61] -> 3\n"
-- "3. byte [61-61] -> 4\n"
-- "4. byte [61-61] -> 5\n"
-- "5. match! 0\n" },
-- { "(a)",
-- "2. capture 2 -> 1\n"
-- "1. byte [61-61] -> 3\n"
-- "3. capture 3 -> 4\n"
-- "4. match! 0\n" },
-- { "(?:a)",
-- "1. byte [61-61] -> 2\n"
-- "2. match! 0\n" },
-- { "",
-- "2. match! 0\n" },
-- { ".",
-- "3. alt -> 1 | 2\n"
-- "1. byte [00-09] -> 4\n"
-- "2. byte [0b-ff] -> 4\n"
-- "4. match! 0\n" },
-- { "[^ab]",
-- "5. alt -> 3 | 4\n"
-- "3. alt -> 1 | 2\n"
-- "4. byte [63-ff] -> 6\n"
-- "1. byte [00-09] -> 6\n"
-- "2. byte [0b-60] -> 6\n"
-- "6. match! 0\n" },
-- { "[Aa]",
-- "1. byte/i [61-61] -> 2\n"
-- "2. match! 0\n" },
--};
--
--TEST(TestRegexpCompileToProg, Simple) {
-- int failed = 0;
-- for (int i = 0; i < arraysize(tests); i++) {
-- const re2::Test& t = tests[i];
-- Regexp* re = Regexp::Parse(t.regexp, Regexp::PerlX|Regexp::Latin1, NULL);
-- if (re == NULL) {
-- LOG(ERROR) << "Cannot parse: " << t.regexp;
-- failed++;
-- continue;
-- }
-- Prog* prog = re->CompileToProg(0);
-- if (prog == NULL) {
-- LOG(ERROR) << "Cannot compile: " << t.regexp;
-- re->Decref();
-- failed++;
-- continue;
-- }
-- CHECK(re->CompileToProg(1) == NULL);
-- string s = prog->Dump();
-- if (s != t.code) {
-- LOG(ERROR) << "Incorrect compiled code for: " << t.regexp;
-- LOG(ERROR) << "Want:\n" << t.code;
-- LOG(ERROR) << "Got:\n" << s;
-- failed++;
-- }
-- delete prog;
-- re->Decref();
-- }
-- EXPECT_EQ(failed, 0);
--}
--
--// The distinct byte ranges involved in the UTF-8 dot ([^\n]).
--// Once, erroneously split between 0x3f and 0x40 because it is
--// a 6-bit boundary.
--static struct UTF8ByteRange {
-- int lo;
-- int hi;
--} utf8ranges[] = {
-- { 0x00, 0x09 },
-- { 0x0A, 0x0A },
-- { 0x10, 0x7F },
-- { 0x80, 0x8F },
-- { 0x90, 0x9F },
-- { 0xA0, 0xBF },
-- { 0xC0, 0xC1 },
-- { 0xC2, 0xDF },
-- { 0xE0, 0xE0 },
-- { 0xE1, 0xEF },
-- { 0xF0, 0xF0 },
-- { 0xF1, 0xF3 },
-- { 0xF4, 0xF4 },
-- { 0xF5, 0xFF },
--};
--
--TEST(TestCompile, ByteRanges) {
-- Regexp* re = Regexp::Parse(".", Regexp::PerlX, NULL);
-- EXPECT_TRUE(re != NULL);
-- Prog* prog = re->CompileToProg(0);
-- EXPECT_TRUE(prog != NULL);
-- EXPECT_EQ(prog->bytemap_range(), arraysize(utf8ranges));
-- for (int i = 0; i < arraysize(utf8ranges); i++)
-- for (int j = utf8ranges[i].lo; j <= utf8ranges[i].hi; j++)
-- EXPECT_EQ(prog->bytemap()[j], i) << " byte " << j;
-- delete prog;
-- re->Decref();
--}
--
--} // namespace re2
-diff --git a/re2/re2/testing/dfa_test.cc b/re2/re2/testing/dfa_test.cc
-deleted file mode 100644
-index 8699e2e..0000000
---- a/re2/re2/testing/dfa_test.cc
-+++ /dev/null
-@@ -1,343 +0,0 @@
--// Copyright 2006-2008 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--#include "util/test.h"
--#include "util/thread.h"
--#include "re2/prog.h"
--#include "re2/re2.h"
--#include "re2/regexp.h"
--#include "re2/testing/regexp_generator.h"
--#include "re2/testing/string_generator.h"
--
--DECLARE_bool(re2_dfa_bail_when_slow);
--
--DEFINE_int32(size, 8, "log2(number of DFA nodes)");
--DEFINE_int32(repeat, 2, "Repetition count.");
--DEFINE_int32(threads, 4, "number of threads");
--
--namespace re2 {
--
--// Check that multithreaded access to DFA class works.
--
--// Helper thread: builds entire DFA for prog.
--class BuildThread : public Thread {
-- public:
-- BuildThread(Prog* prog) : prog_(prog) {}
-- virtual void Run() {
-- CHECK(prog_->BuildEntireDFA(Prog::kFirstMatch));
-- }
--
-- private:
-- Prog* prog_;
--};
--
--TEST(Multithreaded, BuildEntireDFA) {
-- // Create regexp with 2^FLAGS_size states in DFA.
-- string s = "a";
-- for (int i = 0; i < FLAGS_size; i++)
-- s += "[ab]";
-- s += "b";
--
-- // Check that single-threaded code works.
-- {
-- //LOG(INFO) << s;
-- Regexp* re = Regexp::Parse(s.c_str(), Regexp::LikePerl, NULL);
-- CHECK(re);
-- Prog* prog = re->CompileToProg(0);
-- CHECK(prog);
-- BuildThread* t = new BuildThread(prog);
-- t->SetJoinable(true);
-- t->Start();
-- t->Join();
-- delete t;
-- delete prog;
-- re->Decref();
-- }
--
-- // Build the DFA simultaneously in a bunch of threads.
-- for (int i = 0; i < FLAGS_repeat; i++) {
-- Regexp* re = Regexp::Parse(s.c_str(), Regexp::LikePerl, NULL);
-- CHECK(re);
-- Prog* prog = re->CompileToProg(0);
-- CHECK(prog);
--
-- vector<BuildThread*> threads;
-- for (int j = 0; j < FLAGS_threads; j++) {
-- BuildThread *t = new BuildThread(prog);
-- t->SetJoinable(true);
-- threads.push_back(t);
-- }
-- for (int j = 0; j < FLAGS_threads; j++)
-- threads[j]->Start();
-- for (int j = 0; j < FLAGS_threads; j++) {
-- threads[j]->Join();
-- delete threads[j];
-- }
--
-- // One more compile, to make sure everything is okay.
-- prog->BuildEntireDFA(Prog::kFirstMatch);
-- delete prog;
-- re->Decref();
-- }
--}
--
--// Check that DFA size requirements are followed.
--// BuildEntireDFA will, like SearchDFA, stop building out
--// the DFA once the memory limits are reached.
--TEST(SingleThreaded, BuildEntireDFA) {
-- // Create regexp with 2^30 states in DFA.
-- string s = "a";
-- for (int i = 0; i < 30; i++)
-- s += "[ab]";
-- s += "b";
--
-- //LOG(INFO) << s;
-- Regexp* re = Regexp::Parse(s.c_str(), Regexp::LikePerl, NULL);
-- CHECK(re);
-- int max = 24;
-- for (int i = 17; i < max; i++) {
-- int limit = 1<<i;
-- int usage, progusage, dfamem;
-- {
-- testing::MallocCounter m(testing::MallocCounter::THIS_THREAD_ONLY);
-- Prog* prog = re->CompileToProg(limit);
-- CHECK(prog);
-- progusage = m.HeapGrowth();
-- dfamem = prog->dfa_mem();
-- prog->BuildEntireDFA(Prog::kFirstMatch);
-- prog->BuildEntireDFA(Prog::kLongestMatch);
-- usage = m.HeapGrowth();
-- delete prog;
-- }
-- if (!UsingMallocCounter)
-- continue;
-- //LOG(INFO) << StringPrintf("Limit %d: prog used %d, DFA budget %d, total %d\n",
-- // limit, progusage, dfamem, usage);
-- CHECK_GT(usage, limit*9/10);
-- CHECK_LT(usage, limit + (16<<10)); // 16kB of slop okay
-- }
-- re->Decref();
--}
--
--// Generates and returns a string over binary alphabet {0,1} that contains
--// all possible binary sequences of length n as subsequences. The obvious
--// brute force method would generate a string of length n * 2^n, but this
--// generates a string of length n + 2^n - 1 called a De Bruijn cycle.
--// See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17.
--// Such a string is useful for testing a DFA. If you have a DFA
--// where distinct last n bytes implies distinct states, then running on a
--// DeBruijn string causes the DFA to need to create a new state at every
--// position in the input, never reusing any states until it gets to the
--// end of the string. This is the worst possible case for DFA execution.
--static string DeBruijnString(int n) {
-- CHECK_LT(n, 8*sizeof(int));
-- CHECK_GT(n, 0);
--
-- vector<bool> did(1<<n);
-- for (int i = 0; i < 1<<n; i++)
-- did[i] = false;
--
-- string s;
-- for (int i = 0; i < n-1; i++)
-- s.append("0");
-- int bits = 0;
-- int mask = (1<<n) - 1;
-- for (int i = 0; i < (1<<n); i++) {
-- bits <<= 1;
-- bits &= mask;
-- if (!did[bits|1]) {
-- bits |= 1;
-- s.append("1");
-- } else {
-- s.append("0");
-- }
-- CHECK(!did[bits]);
-- did[bits] = true;
-- }
-- return s;
--}
--
--// Test that the DFA gets the right result even if it runs
--// out of memory during a search. The regular expression
--// 0[01]{n}$ matches a binary string of 0s and 1s only if
--// the (n+1)th-to-last character is a 0. Matching this in
--// a single forward pass (as done by the DFA) requires
--// keeping one bit for each of the last n+1 characters
--// (whether each was a 0), or 2^(n+1) possible states.
--// If we run this regexp to search in a string that contains
--// every possible n-character binary string as a substring,
--// then it will have to run through at least 2^n states.
--// States are big data structures -- certainly more than 1 byte --
--// so if the DFA can search correctly while staying within a
--// 2^n byte limit, it must be handling out-of-memory conditions
--// gracefully.
--TEST(SingleThreaded, SearchDFA) {
-- // Choice of n is mostly arbitrary, except that:
-- // * making n too big makes the test run for too long.
-- // * making n too small makes the DFA refuse to run,
-- // because it has so little memory compared to the program size.
-- // Empirically, n = 18 is a good compromise between the two.
-- const int n = 18;
--
-- Regexp* re = Regexp::Parse(StringPrintf("0[01]{%d}$", n),
-- Regexp::LikePerl, NULL);
-- CHECK(re);
--
-- // The De Bruijn string for n ends with a 1 followed by n 0s in a row,
-- // which is not a match for 0[01]{n}$. Adding one more 0 is a match.
-- string no_match = DeBruijnString(n);
-- string match = no_match + "0";
--
-- // The De Bruijn string is the worst case input for this regexp.
-- // By default, the DFA will notice that it is flushing its cache
-- // too frequently and will bail out early, so that RE2 can use the
-- // NFA implementation instead. (The DFA loses its speed advantage
-- // if it can't get a good cache hit rate.)
-- // Tell the DFA to trudge along instead.
-- FLAGS_re2_dfa_bail_when_slow = false;
--
-- int64 usage;
-- int64 peak_usage;
-- {
-- testing::MallocCounter m(testing::MallocCounter::THIS_THREAD_ONLY);
-- Prog* prog = re->CompileToProg(1<<n);
-- CHECK(prog);
-- for (int i = 0; i < 10; i++) {
-- bool matched, failed = false;
-- matched = prog->SearchDFA(match, NULL,
-- Prog::kUnanchored, Prog::kFirstMatch,
-- NULL, &failed, NULL);
-- CHECK(!failed);
-- CHECK(matched);
-- matched = prog->SearchDFA(no_match, NULL,
-- Prog::kUnanchored, Prog::kFirstMatch,
-- NULL, &failed, NULL);
-- CHECK(!failed);
-- CHECK(!matched);
-- }
-- usage = m.HeapGrowth();
-- peak_usage = m.PeakHeapGrowth();
-- delete prog;
-- }
-- re->Decref();
--
-- if (!UsingMallocCounter)
-- return;
-- //LOG(INFO) << "usage " << usage << " " << peak_usage;
-- CHECK_LT(usage, 1<<n);
-- CHECK_LT(peak_usage, 1<<n);
--}
--
--// Helper thread: searches for match, which should match,
--// and no_match, which should not.
--class SearchThread : public Thread {
-- public:
-- SearchThread(Prog* prog, const StringPiece& match,
-- const StringPiece& no_match)
-- : prog_(prog), match_(match), no_match_(no_match) {}
--
-- virtual void Run() {
-- for (int i = 0; i < 2; i++) {
-- bool matched, failed = false;
-- matched = prog_->SearchDFA(match_, NULL,
-- Prog::kUnanchored, Prog::kFirstMatch,
-- NULL, &failed, NULL);
-- CHECK(!failed);
-- CHECK(matched);
-- matched = prog_->SearchDFA(no_match_, NULL,
-- Prog::kUnanchored, Prog::kFirstMatch,
-- NULL, &failed, NULL);
-- CHECK(!failed);
-- CHECK(!matched);
-- }
-- }
--
-- private:
-- Prog* prog_;
-- StringPiece match_;
-- StringPiece no_match_;
--};
--
--TEST(Multithreaded, SearchDFA) {
-- // Same as single-threaded test above.
-- const int n = 18;
-- Regexp* re = Regexp::Parse(StringPrintf("0[01]{%d}$", n),
-- Regexp::LikePerl, NULL);
-- CHECK(re);
-- string no_match = DeBruijnString(n);
-- string match = no_match + "0";
-- FLAGS_re2_dfa_bail_when_slow = false;
--
-- // Check that single-threaded code works.
-- {
-- Prog* prog = re->CompileToProg(1<<n);
-- CHECK(prog);
-- SearchThread* t = new SearchThread(prog, match, no_match);
-- t->SetJoinable(true);
-- t->Start();
-- t->Join();
-- delete t;
-- delete prog;
-- }
--
-- // Run the search simultaneously in a bunch of threads.
-- // Reuse same flags for Multithreaded.BuildDFA above.
-- for (int i = 0; i < FLAGS_repeat; i++) {
-- //LOG(INFO) << "Search " << i;
-- Prog* prog = re->CompileToProg(1<<n);
-- CHECK(prog);
--
-- vector<SearchThread*> threads;
-- for (int j = 0; j < FLAGS_threads; j++) {
-- SearchThread *t = new SearchThread(prog, match, no_match);
-- t->SetJoinable(true);
-- threads.push_back(t);
-- }
-- for (int j = 0; j < FLAGS_threads; j++)
-- threads[j]->Start();
-- for (int j = 0; j < FLAGS_threads; j++) {
-- threads[j]->Join();
-- delete threads[j];
-- }
-- delete prog;
-- }
-- re->Decref();
--}
--
--struct ReverseTest {
-- const char *regexp;
-- const char *text;
-- bool match;
--};
--
--// Test that reverse DFA handles anchored/unanchored correctly.
--// It's in the DFA interface but not used by RE2.
--ReverseTest reverse_tests[] = {
-- { "\\A(a|b)", "abc", true },
-- { "(a|b)\\z", "cba", true },
-- { "\\A(a|b)", "cba", false },
-- { "(a|b)\\z", "abc", false },
--};
--
--TEST(DFA, ReverseMatch) {
-- int nfail = 0;
-- for (int i = 0; i < arraysize(reverse_tests); i++) {
-- const ReverseTest& t = reverse_tests[i];
-- Regexp* re = Regexp::Parse(t.regexp, Regexp::LikePerl, NULL);
-- CHECK(re);
-- Prog *prog = re->CompileToReverseProg(0);
-- CHECK(prog);
-- bool failed = false;
-- bool matched = prog->SearchDFA(t.text, NULL, Prog::kUnanchored, Prog::kFirstMatch, NULL, &failed, NULL);
-- if (matched != t.match) {
-- LOG(ERROR) << t.regexp << " on " << t.text << ": want " << t.match;
-- nfail++;
-- }
-- delete prog;
-- re->Decref();
-- }
-- EXPECT_EQ(nfail, 0);
--}
--
--} // namespace re2
-diff --git a/re2/re2/testing/dump.cc b/re2/re2/testing/dump.cc
-deleted file mode 100644
-index 4bdf714..0000000
---- a/re2/re2/testing/dump.cc
-+++ /dev/null
-@@ -1,164 +0,0 @@
--// Copyright 2006 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Dump the regexp into a string showing structure.
--// Tested by parse_unittest.cc
--
--// This function traverses the regexp recursively,
--// meaning that on inputs like Regexp::Simplify of
--// a{100}{100}{100}{100}{100}{100}{100}{100}{100}{100},
--// it takes time and space exponential in the size of the
--// original regular expression. It can also use stack space
--// linear in the size of the regular expression for inputs
--// like ((((((((((((((((a*)*)*)*)*)*)*)*)*)*)*)*)*)*)*)*)*.
--// IT IS NOT SAFE TO CALL FROM PRODUCTION CODE.
--// As a result, Dump is provided only in the testing
--// library (see BUILD).
--
--#include <string>
--#include <vector>
--#include "util/test.h"
--#include "re2/stringpiece.h"
--#include "re2/regexp.h"
--
--// Cause a link error if this file is used outside of testing.
--DECLARE_string(test_tmpdir);
--
--namespace re2 {
--
--static const char* kOpcodeNames[] = {
-- "bad",
-- "no",
-- "emp",
-- "lit",
-- "str",
-- "cat",
-- "alt",
-- "star",
-- "plus",
-- "que",
-- "rep",
-- "cap",
-- "dot",
-- "byte",
-- "bol",
-- "eol",
-- "wb", // kRegexpWordBoundary
-- "nwb", // kRegexpNoWordBoundary
-- "bot",
-- "eot",
-- "cc",
-- "match",
--};
--
--// Create string representation of regexp with explicit structure.
--// Nothing pretty, just for testing.
--static void DumpRegexpAppending(Regexp* re, string* s) {
-- if (re->op() < 0 || re->op() >= arraysize(kOpcodeNames)) {
-- StringAppendF(s, "op%d", re->op());
-- } else {
-- switch (re->op()) {
-- default:
-- break;
-- case kRegexpStar:
-- case kRegexpPlus:
-- case kRegexpQuest:
-- case kRegexpRepeat:
-- if (re->parse_flags() & Regexp::NonGreedy)
-- s->append("n");
-- break;
-- }
-- s->append(kOpcodeNames[re->op()]);
-- if (re->op() == kRegexpLiteral && (re->parse_flags() & Regexp::FoldCase)) {
-- Rune r = re->rune();
-- if ('a' <= r && r <= 'z')
-- s->append("fold");
-- }
-- if (re->op() == kRegexpLiteralString && (re->parse_flags() & Regexp::FoldCase)) {
-- for (int i = 0; i < re->nrunes(); i++) {
-- Rune r = re->runes()[i];
-- if ('a' <= r && r <= 'z') {
-- s->append("fold");
-- break;
-- }
-- }
-- }
-- }
-- s->append("{");
-- switch (re->op()) {
-- default:
-- break;
-- case kRegexpEndText:
-- if (!(re->parse_flags() & Regexp::WasDollar)) {
-- s->append("\\z");
-- }
-- break;
-- case kRegexpLiteral: {
-- Rune r = re->rune();
-- char buf[UTFmax+1];
-- buf[runetochar(buf, &r)] = 0;
-- s->append(buf);
-- break;
-- }
-- case kRegexpLiteralString:
-- for (int i = 0; i < re->nrunes(); i++) {
-- Rune r = re->runes()[i];
-- char buf[UTFmax+1];
-- buf[runetochar(buf, &r)] = 0;
-- s->append(buf);
-- }
-- break;
-- case kRegexpConcat:
-- case kRegexpAlternate:
-- for (int i = 0; i < re->nsub(); i++)
-- DumpRegexpAppending(re->sub()[i], s);
-- break;
-- case kRegexpStar:
-- case kRegexpPlus:
-- case kRegexpQuest:
-- DumpRegexpAppending(re->sub()[0], s);
-- break;
-- case kRegexpCapture:
-- if (re->name()) {
-- s->append(*re->name());
-- s->append(":");
-- }
-- DumpRegexpAppending(re->sub()[0], s);
-- break;
-- case kRegexpRepeat:
-- s->append(StringPrintf("%d,%d ", re->min(), re->max()));
-- DumpRegexpAppending(re->sub()[0], s);
-- break;
-- case kRegexpCharClass: {
-- string sep;
-- for (CharClass::iterator it = re->cc()->begin();
-- it != re->cc()->end(); ++it) {
-- RuneRange rr = *it;
-- s->append(sep);
-- if (rr.lo == rr.hi)
-- s->append(StringPrintf("%#x", rr.lo));
-- else
-- s->append(StringPrintf("%#x-%#x", rr.lo, rr.hi));
-- sep = " ";
-- }
-- break;
-- }
-- }
-- s->append("}");
--}
--
--string Regexp::Dump() {
-- string s;
--
-- // Make sure being called from a unit test.
-- if (FLAGS_test_tmpdir.empty()) {
-- LOG(ERROR) << "Cannot use except for testing.";
-- return s;
-- }
--
-- DumpRegexpAppending(this, &s);
-- return s;
--}
--
--} // namespace re2
-diff --git a/re2/re2/testing/exhaustive1_test.cc b/re2/re2/testing/exhaustive1_test.cc
-deleted file mode 100644
-index 9e057cc..0000000
---- a/re2/re2/testing/exhaustive1_test.cc
-+++ /dev/null
-@@ -1,42 +0,0 @@
--// Copyright 2008 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Exhaustive testing of regular expression matching.
--
--#include "util/test.h"
--#include "re2/testing/exhaustive_tester.h"
--
--DECLARE_string(regexp_engines);
--
--namespace re2 {
--
--// Test simple repetition operators
--TEST(Repetition, Simple) {
-- vector<string> ops = Split(" ",
-- "%s{0} %s{0,} %s{1} %s{1,} %s{0,1} %s{0,2} "
-- "%s{1,2} %s{2} %s{2,} %s{3,4} %s{4,5} "
-- "%s* %s+ %s? %s*? %s+? %s??");
-- ExhaustiveTest(3, 2, Explode("abc."), ops,
-- 6, Explode("ab"), "(?:%s)", "");
-- ExhaustiveTest(3, 2, Explode("abc."), ops,
-- 40, Explode("a"), "(?:%s)", "");
--}
--
--// Test capturing parens -- (a) -- inside repetition operators
--TEST(Repetition, Capturing) {
-- vector<string> ops = Split(" ",
-- "%s{0} %s{0,} %s{1} %s{1,} %s{0,1} %s{0,2} "
-- "%s{1,2} %s{2} %s{2,} %s{3,4} %s{4,5} "
-- "%s* %s+ %s? %s*? %s+? %s??");
-- ExhaustiveTest(3, 2, Split(" ", "a (a) b"), ops,
-- 7, Explode("ab"), "(?:%s)", "");
--
-- // This would be a great test, but it runs forever when PCRE is enabled.
-- if (strstr("PCRE", FLAGS_regexp_engines.c_str()) == NULL)
-- ExhaustiveTest(4, 3, Split(" ", "a (a)"), ops,
-- 100, Explode("a"), "(?:%s)", "");
--}
--
--} // namespace re2
--
-diff --git a/re2/re2/testing/exhaustive2_test.cc b/re2/re2/testing/exhaustive2_test.cc
-deleted file mode 100644
-index c5fec5b..0000000
---- a/re2/re2/testing/exhaustive2_test.cc
-+++ /dev/null
-@@ -1,70 +0,0 @@
--// Copyright 2008 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Exhaustive testing of regular expression matching.
--
--#include "util/test.h"
--#include "re2/re2.h"
--#include "re2/testing/exhaustive_tester.h"
--
--DECLARE_string(regexp_engines);
--
--namespace re2 {
--
--// Test empty string matches (aka "(?:)")
--TEST(EmptyString, Exhaustive) {
-- ExhaustiveTest(2, 2, Split(" ", "(?:) a"),
-- RegexpGenerator::EgrepOps(),
-- 5, Split("", "ab"), "", "");
--}
--
--// Test escaped versions of regexp syntax.
--TEST(Punctuation, Literals) {
-- vector<string> alphabet = Explode("()*+?{}[]\\^$.");
-- vector<string> escaped = alphabet;
-- for (int i = 0; i < escaped.size(); i++)
-- escaped[i] = "\\" + escaped[i];
-- ExhaustiveTest(1, 1, escaped, RegexpGenerator::EgrepOps(),
-- 2, alphabet, "", "");
--}
--
--// Test ^ $ . \A \z in presence of line endings.
--// Have to wrap the empty-width ones in (?:) so that
--// they can be repeated -- PCRE rejects ^* but allows (?:^)*
--TEST(LineEnds, Exhaustive) {
-- ExhaustiveTest(2, 2, Split(" ", "(?:^) (?:$) . a \\n (?:\\A) (?:\\z)"),
-- RegexpGenerator::EgrepOps(),
-- 4, Explode("ab\n"), "", "");
--}
--
--// Test what does and does not match \n.
--// This would be a good test, except that PCRE seems to have a bug:
--// in single-byte character set mode (the default),
--// [^a] matches \n, but in UTF-8 mode it does not.
--// So when we run the test, the tester complains that
--// we don't agree with PCRE, but it's PCRE that is at fault.
--// For what it's worth, Perl gets this right (matches
--// regardless of whether UTF-8 input is selected):
--//
--// #!/usr/bin/perl
--// use POSIX qw(locale_h);
--// print "matches in latin1\n" if "\n" =~ /[^a]/;
--// setlocale("en_US.utf8");
--// print "matches in utf8\n" if "\n" =~ /[^a]/;
--//
--// The rule chosen for RE2 is that by default, like Perl,
--// dot does not match \n but negated character classes [^a] do.
--// (?s) will allow dot to match \n; there is no way in RE2
--// to stop [^a] from matching \n, though the underlying library
--// provides a mechanism, and RE2 could add new syntax if needed.
--//
--// TEST(Newlines, Exhaustive) {
--// vector<string> empty_vector;
--// ExhaustiveTest(1, 1, Split(" ", "\\n . a [^a]"),
--// RegexpGenerator::EgrepOps(),
--// 4, Explode("a\n"), "");
--// }
--
--} // namespace re2
--
-diff --git a/re2/re2/testing/exhaustive3_test.cc b/re2/re2/testing/exhaustive3_test.cc
-deleted file mode 100644
-index 5613fcb..0000000
---- a/re2/re2/testing/exhaustive3_test.cc
-+++ /dev/null
-@@ -1,94 +0,0 @@
--// Copyright 2008 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Exhaustive testing of regular expression matching.
--
--#include "util/test.h"
--#include "re2/testing/exhaustive_tester.h"
--
--namespace re2 {
--
--// Test simple character classes by themselves.
--TEST(CharacterClasses, Exhaustive) {
-- vector<string> atoms = Split(" ",
-- "[a] [b] [ab] [^bc] [b-d] [^b-d] []a] [-a] [a-] [^-a] [a-b-c] a b .");
-- ExhaustiveTest(2, 1, atoms, RegexpGenerator::EgrepOps(),
-- 5, Explode("ab"), "", "");
--}
--
--// Test simple character classes inside a___b (for example, a[a]b).
--TEST(CharacterClasses, ExhaustiveAB) {
-- vector<string> atoms = Split(" ",
-- "[a] [b] [ab] [^bc] [b-d] [^b-d] []a] [-a] [a-] [^-a] [a-b-c] a b .");
-- ExhaustiveTest(2, 1, atoms, RegexpGenerator::EgrepOps(),
-- 5, Explode("ab"), "a%sb", "");
--}
--
--// Returns UTF8 for Rune r
--static string UTF8(Rune r) {
-- char buf[UTFmax+1];
-- buf[runetochar(buf, &r)] = 0;
-- return string(buf);
--}
--
--// Returns a vector of "interesting" UTF8 characters.
--// Unicode is now too big to just return all of them,
--// so UTF8Characters return a set likely to be good test cases.
--static const vector<string>& InterestingUTF8() {
-- static bool init;
-- static vector<string> v;
--
-- if (init)
-- return v;
--
-- init = true;
-- // All the Latin1 equivalents are interesting.
-- for (int i = 1; i < 256; i++)
-- v.push_back(UTF8(i));
--
-- // After that, the codes near bit boundaries are
-- // interesting, because they span byte sequence lengths.
-- for (int j = 0; j < 8; j++)
-- v.push_back(UTF8(256 + j));
-- for (int i = 512; i < Runemax; i <<= 1)
-- for (int j = -8; j < 8; j++)
-- v.push_back(UTF8(i + j));
--
-- // The codes near Runemax, including Runemax itself, are interesting.
-- for (int j = -8; j <= 0; j++)
-- v.push_back(UTF8(Runemax + j));
--
-- return v;
--}
--
--// Test interesting UTF-8 characters against character classes.
--TEST(InterestingUTF8, SingleOps) {
-- vector<string> atoms = Split(" ",
-- ". ^ $ \\a \\f \\n \\r \\t \\v \\d \\D \\s \\S \\w \\W \\b \\B "
-- "[[:alnum:]] [[:alpha:]] [[:blank:]] [[:cntrl:]] [[:digit:]] "
-- "[[:graph:]] [[:lower:]] [[:print:]] [[:punct:]] [[:space:]] "
-- "[[:upper:]] [[:xdigit:]] [\\s\\S] [\\d\\D] [^\\w\\W] [^\\d\\D]");
-- vector<string> ops; // no ops
-- ExhaustiveTest(1, 0, atoms, ops,
-- 1, InterestingUTF8(), "", "");
--}
--
--// Test interesting UTF-8 characters against character classes,
--// but wrap everything inside AB.
--TEST(InterestingUTF8, AB) {
-- vector<string> atoms = Split(" ",
-- ". ^ $ \\a \\f \\n \\r \\t \\v \\d \\D \\s \\S \\w \\W \\b \\B "
-- "[[:alnum:]] [[:alpha:]] [[:blank:]] [[:cntrl:]] [[:digit:]] "
-- "[[:graph:]] [[:lower:]] [[:print:]] [[:punct:]] [[:space:]] "
-- "[[:upper:]] [[:xdigit:]] [\\s\\S] [\\d\\D] [^\\w\\W] [^\\d\\D]");
-- vector<string> ops; // no ops
-- vector<string> alpha = InterestingUTF8();
-- for (int i = 0; i < alpha.size(); i++)
-- alpha[i] = "a" + alpha[i] + "b";
-- ExhaustiveTest(1, 0, atoms, ops,
-- 1, alpha, "a%sb", "");
--}
--
--} // namespace re2
--
-diff --git a/re2/re2/testing/exhaustive_test.cc b/re2/re2/testing/exhaustive_test.cc
-deleted file mode 100644
-index fc40dee..0000000
---- a/re2/re2/testing/exhaustive_test.cc
-+++ /dev/null
-@@ -1,38 +0,0 @@
--// Copyright 2008 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Exhaustive testing of regular expression matching.
--
--#include "util/test.h"
--#include "re2/testing/exhaustive_tester.h"
--
--namespace re2 {
--
--DECLARE_string(regexp_engines);
--
--// Test very simple expressions.
--TEST(EgrepLiterals, Lowercase) {
-- EgrepTest(3, 2, "abc.", 3, "abc", "");
--}
--
--// Test mixed-case expressions.
--TEST(EgrepLiterals, MixedCase) {
-- EgrepTest(3, 2, "AaBb.", 2, "AaBb", "");
--}
--
--// Test mixed-case in case-insensitive mode.
--TEST(EgrepLiterals, FoldCase) {
-- // The punctuation characters surround A-Z and a-z
-- // in the ASCII table. This looks for bugs in the
-- // bytemap range code in the DFA.
-- EgrepTest(3, 2, "abAB.", 2, "aBc at _~", "(?i:%s)");
--}
--
--// Test very simple expressions.
--TEST(EgrepLiterals, UTF8) {
-- EgrepTest(3, 2, "ab.", 4, "a\xE2\x98\xBA", "");
--}
--
--} // namespace re2
--
-diff --git a/re2/re2/testing/exhaustive_tester.cc b/re2/re2/testing/exhaustive_tester.cc
-deleted file mode 100644
-index 54de857..0000000
---- a/re2/re2/testing/exhaustive_tester.cc
-+++ /dev/null
-@@ -1,188 +0,0 @@
--// Copyright 2008 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Exhaustive testing of regular expression matching.
--
--// Each test picks an alphabet (e.g., "abc"), a maximum string length,
--// a maximum regular expression length, and a maximum number of letters
--// that can appear in the regular expression. Given these parameters,
--// it tries every possible regular expression and string, verifying that
--// the NFA, DFA, and a trivial backtracking implementation agree about
--// the location of the match.
--
--#include <stdlib.h>
--#include <stdio.h>
--
--#ifndef LOGGING
--#define LOGGING 0
--#endif
--
--#include "util/test.h"
--#include "re2/testing/exhaustive_tester.h"
--#include "re2/testing/tester.h"
--
--DEFINE_bool(show_regexps, false, "show regexps during testing");
--
--DEFINE_int32(max_bad_regexp_inputs, 1,
-- "Stop testing a regular expression after finding this many "
-- "strings that break it.");
--
--// Compiled in debug mode, the usual tests run for over an hour.
--// Have to cut it down to make the unit test machines happy.
--DEFINE_bool(quick_debug_mode, true, "Run fewer tests in debug mode.");
--
--namespace re2 {
--
--static char* escape(const StringPiece& sp) {
-- static char buf[512];
-- char* p = buf;
-- *p++ = '\"';
-- for (int i = 0; i < sp.size(); i++) {
-- if(p+5 >= buf+sizeof buf)
-- LOG(FATAL) << "ExhaustiveTester escape: too long";
-- if(sp[i] == '\\' || sp[i] == '\"') {
-- *p++ = '\\';
-- *p++ = sp[i];
-- } else if(sp[i] == '\n') {
-- *p++ = '\\';
-- *p++ = 'n';
-- } else {
-- *p++ = sp[i];
-- }
-- }
-- *p++ = '\"';
-- *p = '\0';
-- return buf;
--}
--
--static void PrintResult(const RE2& re, const StringPiece& input, RE2::Anchor anchor, StringPiece *m, int n) {
-- if (!re.Match(input, 0, input.size(), anchor, m, n)) {
-- printf("-");
-- return;
-- }
-- for (int i = 0; i < n; i++) {
-- if (i > 0)
-- printf(" ");
-- if (m[i].begin() == NULL)
-- printf("-");
-- else
-- printf("%d-%d", static_cast<int>(m[i].begin() - input.begin()), static_cast<int>(m[i].end() - input.begin()));
-- }
--}
--
--// Processes a single generated regexp.
--// Compiles it using Regexp interface and PCRE, and then
--// checks that NFA, DFA, and PCRE all return the same results.
--void ExhaustiveTester::HandleRegexp(const string& const_regexp) {
-- regexps_++;
-- string regexp = const_regexp;
-- if (!topwrapper_.empty())
-- regexp = StringPrintf(topwrapper_.c_str(), regexp.c_str());
--
-- if (FLAGS_show_regexps) {
-- printf("\r%s", regexp.c_str());
-- fflush(stdout);
-- }
--
-- if (LOGGING) {
-- // Write out test cases and answers for use in testing
-- // other implementations, such as Go's regexp package.
-- if (randomstrings_)
-- LOG(ERROR) << "Cannot log with random strings.";
-- if (regexps_ == 1) { // first
-- printf("strings\n");
-- strgen_.Reset();
-- while (strgen_.HasNext())
-- printf("%s\n", escape(strgen_.Next()));
-- printf("regexps\n");
-- }
-- printf("%s\n", escape(regexp));
--
-- RE2 re(regexp);
-- RE2::Options longest;
-- longest.set_longest_match(true);
-- RE2 relongest(regexp, longest);
-- int ngroup = re.NumberOfCapturingGroups()+1;
-- StringPiece* group = new StringPiece[ngroup];
--
-- strgen_.Reset();
-- while (strgen_.HasNext()) {
-- StringPiece input = strgen_.Next();
-- PrintResult(re, input, RE2::ANCHOR_BOTH, group, ngroup);
-- printf(";");
-- PrintResult(re, input, RE2::UNANCHORED, group, ngroup);
-- printf(";");
-- PrintResult(relongest, input, RE2::ANCHOR_BOTH, group, ngroup);
-- printf(";");
-- PrintResult(relongest, input, RE2::UNANCHORED, group, ngroup);
-- printf("\n");
-- }
-- delete[] group;
-- return;
-- }
--
-- Tester tester(regexp);
-- if (tester.error())
-- return;
--
-- strgen_.Reset();
-- strgen_.GenerateNULL();
-- if (randomstrings_)
-- strgen_.Random(stringseed_, stringcount_);
-- int bad_inputs = 0;
-- while (strgen_.HasNext()) {
-- tests_++;
-- if (!tester.TestInput(strgen_.Next())) {
-- failures_++;
-- if (++bad_inputs >= FLAGS_max_bad_regexp_inputs)
-- break;
-- }
-- }
--}
--
--// Runs an exhaustive test on the given parameters.
--void ExhaustiveTest(int maxatoms, int maxops,
-- const vector<string>& alphabet,
-- const vector<string>& ops,
-- int maxstrlen, const vector<string>& stralphabet,
-- const string& wrapper,
-- const string& topwrapper) {
-- if (DEBUG_MODE && FLAGS_quick_debug_mode) {
-- if (maxatoms > 1)
-- maxatoms--;
-- if (maxops > 1)
-- maxops--;
-- if (maxstrlen > 1)
-- maxstrlen--;
-- }
-- ExhaustiveTester t(maxatoms, maxops, alphabet, ops,
-- maxstrlen, stralphabet, wrapper,
-- topwrapper);
-- t.Generate();
-- if (!LOGGING) {
-- printf("%d regexps, %d tests, %d failures [%d/%d str]\n",
-- t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size());
-- }
-- EXPECT_EQ(0, t.failures());
--}
--
--// Runs an exhaustive test using the given parameters and
--// the basic egrep operators.
--void EgrepTest(int maxatoms, int maxops, const string& alphabet,
-- int maxstrlen, const string& stralphabet,
-- const string& wrapper) {
-- const char* tops[] = { "", "^(?:%s)", "(?:%s)$", "^(?:%s)$" };
--
-- for (int i = 0; i < arraysize(tops); i++) {
-- ExhaustiveTest(maxatoms, maxops,
-- Split("", alphabet),
-- RegexpGenerator::EgrepOps(),
-- maxstrlen,
-- Split("", stralphabet),
-- wrapper,
-- tops[i]);
-- }
--}
--
--} // namespace re2
-diff --git a/re2/re2/testing/exhaustive_tester.h b/re2/re2/testing/exhaustive_tester.h
-deleted file mode 100644
-index 38a139f..0000000
---- a/re2/re2/testing/exhaustive_tester.h
-+++ /dev/null
-@@ -1,85 +0,0 @@
--// Copyright 2009 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--#ifndef RE2_TESTING_EXHAUSTIVE_TESTER_H__
--#define RE2_TESTING_EXHAUSTIVE_TESTER_H__
--
--#include <string>
--#include <vector>
--#include "util/util.h"
--#include "re2/testing/regexp_generator.h"
--#include "re2/testing/string_generator.h"
--
--namespace re2 {
--
--// Exhaustive regular expression test: generate all regexps within parameters,
--// then generate all strings of a given length over a given alphabet,
--// then check that NFA, DFA, and PCRE agree about whether each regexp matches
--// each possible string, and if so, where the match is.
--//
--// Can also be used in a "random" mode that generates a given number
--// of random regexp and strings, allowing testing of larger expressions
--// and inputs.
--class ExhaustiveTester : public RegexpGenerator {
-- public:
-- ExhaustiveTester(int maxatoms,
-- int maxops,
-- const vector<string>& alphabet,
-- const vector<string>& ops,
-- int maxstrlen,
-- const vector<string>& stralphabet,
-- const string& wrapper,
-- const string& topwrapper)
-- : RegexpGenerator(maxatoms, maxops, alphabet, ops),
-- strgen_(maxstrlen, stralphabet),
-- wrapper_(wrapper),
-- topwrapper_(topwrapper),
-- regexps_(0), tests_(0), failures_(0),
-- randomstrings_(0), stringseed_(0), stringcount_(0) { }
--
-- int regexps() { return regexps_; }
-- int tests() { return tests_; }
-- int failures() { return failures_; }
--
-- // Needed for RegexpGenerator interface.
-- void HandleRegexp(const string& regexp);
--
-- // Causes testing to generate random input strings.
-- void RandomStrings(int32 seed, int32 count) {
-- randomstrings_ = true;
-- stringseed_ = seed;
-- stringcount_ = count;
-- }
--
-- private:
-- StringGenerator strgen_;
-- string wrapper_; // Regexp wrapper - either empty or has one %s.
-- string topwrapper_; // Regexp top-level wrapper.
-- int regexps_; // Number of HandleRegexp calls
-- int tests_; // Number of regexp tests.
-- int failures_; // Number of tests failed.
--
-- bool randomstrings_; // Whether to use random strings
-- int32 stringseed_; // If so, the seed.
-- int stringcount_; // If so, how many to generate.
-- DISALLOW_EVIL_CONSTRUCTORS(ExhaustiveTester);
--};
--
--// Runs an exhaustive test on the given parameters.
--void ExhaustiveTest(int maxatoms, int maxops,
-- const vector<string>& alphabet,
-- const vector<string>& ops,
-- int maxstrlen, const vector<string>& stralphabet,
-- const string& wrapper,
-- const string& topwrapper);
--
--// Runs an exhaustive test using the given parameters and
--// the basic egrep operators.
--void EgrepTest(int maxatoms, int maxops, const string& alphabet,
-- int maxstrlen, const string& stralphabet,
-- const string& wrapper);
--
--} // namespace re2
--
--#endif // RE2_TESTING_EXHAUSTIVE_TESTER_H__
-diff --git a/re2/re2/testing/filtered_re2_test.cc b/re2/re2/testing/filtered_re2_test.cc
-deleted file mode 100644
-index 7755d30..0000000
---- a/re2/re2/testing/filtered_re2_test.cc
-+++ /dev/null
-@@ -1,258 +0,0 @@
--// Copyright 2009 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--#include "util/test.h"
--#include "re2/filtered_re2.h"
--#include "re2/re2.h"
--
--DECLARE_int32(filtered_re2_min_atom_len); // From prefilter_tree.cc
--
--namespace re2 {
--
--struct FilterTestVars {
-- vector<string> atoms;
-- vector<int> atom_indices;
-- vector<int> matches;
-- RE2::Options opts;
-- FilteredRE2 f;
--};
--
--TEST(FilteredRE2Test, EmptyTest) {
-- FilterTestVars v;
-- v.f.AllMatches("foo", v.atom_indices, &v.matches);
-- EXPECT_EQ(0, v.matches.size());
--}
--
--TEST(FilteredRE2Test, SmallOrTest) {
-- FLAGS_filtered_re2_min_atom_len = 4;
--
-- FilterTestVars v;
-- int id;
-- v.f.Add("(foo|bar)", v.opts, &id);
--
-- v.f.Compile(&v.atoms);
-- EXPECT_EQ(0, v.atoms.size());
--
-- v.f.AllMatches("lemurs bar", v.atom_indices, &v.matches);
-- EXPECT_EQ(1, v.matches.size());
-- EXPECT_EQ(id, v.matches[0]);
--}
--
--struct AtomTest {
-- const char* testname;
-- // If any test needs more than this many regexps or atoms, increase
-- // the size of the corresponding array.
-- const char* regexps[20];
-- const char* atoms[20];
--};
--
--AtomTest atom_tests[] = {
-- {
-- // This test checks to make sure empty patterns are allowed.
-- "CheckEmptyPattern",
-- {""},
-- {}
-- }, {
-- // This test checks that all atoms of length greater than min length
-- // are found, and no atoms that are of smaller length are found.
-- "AllAtomsGtMinLengthFound", {
-- "(abc123|def456|ghi789).*mnop[x-z]+",
-- "abc..yyy..zz",
-- "mnmnpp[a-z]+PPP"
-- }, {
-- "abc123",
-- "def456",
-- "ghi789",
-- "mnop",
-- "abc",
-- "yyy",
-- "mnmnpp",
-- "ppp"
-- }
-- }, {
-- // Test to make sure that any atoms that have another atom as a
-- // substring in an OR are removed; that is, only the shortest
-- // substring is kept.
-- "SubstrAtomRemovesSuperStrInOr", {
-- "(abc123|abc|ghi789|abc1234).*[x-z]+",
-- "abcd..yyy..yyyzzz",
-- "mnmnpp[a-z]+PPP"
-- }, {
-- "abc",
-- "ghi789",
-- "abcd",
-- "yyy",
-- "yyyzzz",
-- "mnmnpp",
-- "ppp"
-- }
-- }, {
-- // Test character class expansion.
-- "CharClassExpansion", {
-- "m[a-c][d-f]n.*[x-z]+",
-- "[x-y]bcde[ab]"
-- }, {
-- "madn", "maen", "mafn",
-- "mbdn", "mben", "mbfn",
-- "mcdn", "mcen", "mcfn",
-- "xbcdea", "xbcdeb",
-- "ybcdea", "ybcdeb"
-- }
-- }, {
-- // Test upper/lower of non-ASCII.
-- "UnicodeLower", {
-- "(?i)ΔδΠϖπΣςσ",
-- "ΛΜΝΟΠ",
-- "ψρστυ",
-- }, {
-- "δδπππσσσ",
-- "λμνοπ",
-- "ψρστυ",
-- },
-- },
--};
--
--void AddRegexpsAndCompile(const char* regexps[],
-- int n,
-- struct FilterTestVars* v) {
-- for (int i = 0; i < n; i++) {
-- int id;
-- v->f.Add(regexps[i], v->opts, &id);
-- }
-- v->f.Compile(&v->atoms);
--}
--
--bool CheckExpectedAtoms(const char* atoms[],
-- int n,
-- const char* testname,
-- struct FilterTestVars* v) {
-- vector<string> expected;
-- for (int i = 0; i < n; i++)
-- expected.push_back(atoms[i]);
--
-- bool pass = expected.size() == v->atoms.size();
--
-- sort(v->atoms.begin(), v->atoms.end());
-- sort(expected.begin(), expected.end());
-- for (int i = 0; pass && i < n; i++)
-- pass = pass && expected[i] == v->atoms[i];
--
-- if (!pass) {
-- LOG(WARNING) << "Failed " << testname;
-- LOG(WARNING) << "Expected #atoms = " << expected.size();
-- for (int i = 0; i < expected.size(); i++)
-- LOG(WARNING) << expected[i];
-- LOG(WARNING) << "Found #atoms = " << v->atoms.size();
-- for (int i = 0; i < v->atoms.size(); i++)
-- LOG(WARNING) << v->atoms[i];
-- }
--
-- return pass;
--}
--
--TEST(FilteredRE2Test, AtomTests) {
-- FLAGS_filtered_re2_min_atom_len = 3;
--
-- int nfail = 0;
-- for (int i = 0; i < arraysize(atom_tests); i++) {
-- FilterTestVars v;
-- AtomTest* t = &atom_tests[i];
-- int natom, nregexp;
-- for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++)
-- if (t->regexps[nregexp] == NULL)
-- break;
-- for (natom = 0; natom < arraysize(t->atoms); natom++)
-- if (t->atoms[natom] == NULL)
-- break;
-- AddRegexpsAndCompile(t->regexps, nregexp, &v);
-- if (!CheckExpectedAtoms(t->atoms, natom, t->testname, &v))
-- nfail++;
-- }
-- EXPECT_EQ(0, nfail);
--}
--
--void FindAtomIndices(const vector<string> atoms,
-- const vector<string> matched_atoms,
-- vector<int>* atom_indices) {
-- atom_indices->clear();
-- for (int i = 0; i < matched_atoms.size(); i++) {
-- int j = 0;
-- for (; j < atoms.size(); j++) {
-- if (matched_atoms[i] == atoms[j]) {
-- atom_indices->push_back(j);
-- break;
-- }
-- EXPECT_LT(j, atoms.size());
-- }
-- }
--}
--
--TEST(FilteredRE2Test, MatchEmptyPattern) {
-- FLAGS_filtered_re2_min_atom_len = 3;
--
-- FilterTestVars v;
-- AtomTest* t = &atom_tests[0];
-- // We are using the regexps used in one of the atom tests
-- // for this test. Adding the EXPECT here to make sure
-- // the index we use for the test is for the correct test.
-- EXPECT_EQ("CheckEmptyPattern", string(t->testname));
-- int nregexp;
-- for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++)
-- if (t->regexps[nregexp] == NULL)
-- break;
-- AddRegexpsAndCompile(t->regexps, nregexp, &v);
-- string text = "0123";
-- vector<int> atom_ids;
-- vector<int> matching_regexps;
-- EXPECT_EQ(0, v.f.FirstMatch(text, atom_ids));
--}
--
--TEST(FilteredRE2Test, MatchTests) {
-- FLAGS_filtered_re2_min_atom_len = 3;
--
-- FilterTestVars v;
-- AtomTest* t = &atom_tests[2];
-- // We are using the regexps used in one of the atom tests
-- // for this test.
-- EXPECT_EQ("SubstrAtomRemovesSuperStrInOr", string(t->testname));
-- int nregexp;
-- for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++)
-- if (t->regexps[nregexp] == NULL)
-- break;
-- AddRegexpsAndCompile(t->regexps, nregexp, &v);
--
-- string text = "abc121212xyz";
-- // atoms = abc
-- vector<int> atom_ids;
-- vector<string> atoms;
-- atoms.push_back("abc");
-- FindAtomIndices(v.atoms, atoms, &atom_ids);
-- vector<int> matching_regexps;
-- v.f.AllMatches(text, atom_ids, &matching_regexps);
-- EXPECT_EQ(1, matching_regexps.size());
--
-- text = "abc12312yyyzzz";
-- atoms.clear();
-- atoms.push_back("abc");
-- atoms.push_back("yyy");
-- atoms.push_back("yyyzzz");
-- FindAtomIndices(v.atoms, atoms, &atom_ids);
-- v.f.AllMatches(text, atom_ids, &matching_regexps);
-- EXPECT_EQ(1, matching_regexps.size());
--
-- text = "abcd12yyy32yyyzzz";
-- atoms.clear();
-- atoms.push_back("abc");
-- atoms.push_back("abcd");
-- atoms.push_back("yyy");
-- atoms.push_back("yyyzzz");
-- FindAtomIndices(v.atoms, atoms, &atom_ids);
-- LOG(INFO) << "S: " << atom_ids.size();
-- for (int i = 0; i < atom_ids.size(); i++)
-- LOG(INFO) << "i: " << i << " : " << atom_ids[i];
-- v.f.AllMatches(text, atom_ids, &matching_regexps);
-- EXPECT_EQ(2, matching_regexps.size());
--}
--
--} // namespace re2
-diff --git a/re2/re2/testing/mimics_pcre_test.cc b/re2/re2/testing/mimics_pcre_test.cc
-deleted file mode 100644
-index f965092..0000000
---- a/re2/re2/testing/mimics_pcre_test.cc
-+++ /dev/null
-@@ -1,76 +0,0 @@
--// Copyright 2008 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--#include "util/test.h"
--#include "re2/prog.h"
--#include "re2/regexp.h"
--
--namespace re2 {
--
--struct PCRETest {
-- const char* regexp;
-- bool should_match;
--};
--
--static PCRETest tests[] = {
-- // Most things should behave exactly.
-- { "abc", true },
-- { "(a|b)c", true },
-- { "(a*|b)c", true },
-- { "(a|b*)c", true },
-- { "a(b|c)d", true },
-- { "a(()|())c", true },
-- { "ab*c", true },
-- { "ab+c", true },
-- { "a(b*|c*)d", true },
-- { "\\W", true },
-- { "\\W{1,2}", true },
-- { "\\d", true },
--
-- // Check that repeated empty strings do not.
-- { "(a*)*", false },
-- { "x(a*)*y", false },
-- { "(a*)+", false },
-- { "(a+)*", true },
-- { "(a+)+", true },
-- { "(a+)+", true },
--
-- // \v is the only character class that shouldn't.
-- { "\\b", true },
-- { "\\v", false },
-- { "\\d", true },
--
-- // The handling of ^ in multi-line mode is different, as is
-- // the handling of $ in single-line mode. (Both involve
-- // boundary cases if the string ends with \n.)
-- { "\\A", true },
-- { "\\z", true },
-- { "(?m)^", false },
-- { "(?m)$", true },
-- { "(?-m)^", true },
-- { "(?-m)$", false }, // In PCRE, == \Z
-- { "(?m)\\A", true },
-- { "(?m)\\z", true },
-- { "(?-m)\\A", true },
-- { "(?-m)\\z", true },
--};
--
--TEST(MimicsPCRE, SimpleTests) {
-- for (int i = 0; i < arraysize(tests); i++) {
-- const PCRETest& t = tests[i];
-- for (int j = 0; j < 2; j++) {
-- Regexp::ParseFlags flags = Regexp::LikePerl;
-- if (j == 0)
-- flags = flags | Regexp::Latin1;
-- Regexp* re = Regexp::Parse(t.regexp, flags, NULL);
-- CHECK(re) << " " << t.regexp;
-- CHECK_EQ(t.should_match, re->MimicsPCRE())
-- << " " << t.regexp << " "
-- << (j==0 ? "latin1" : "utf");
-- re->Decref();
-- }
-- }
--}
--
--} // namespace re2
-diff --git a/re2/re2/testing/null_walker.cc b/re2/re2/testing/null_walker.cc
-deleted file mode 100644
-index 09b53cb..0000000
---- a/re2/re2/testing/null_walker.cc
-+++ /dev/null
-@@ -1,44 +0,0 @@
--// Copyright 2009 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--#include "util/test.h"
--#include "re2/regexp.h"
--#include "re2/walker-inl.h"
--
--namespace re2 {
--
--// Null walker. For benchmarking the walker itself.
--
--class NullWalker : public Regexp::Walker<bool> {
-- public:
-- NullWalker() { }
-- bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
-- bool* child_args, int nchild_args);
--
-- bool ShortVisit(Regexp* re, bool a) {
-- // Should never be called: we use Walk not WalkExponential.
-- LOG(DFATAL) << "NullWalker::ShortVisit called";
-- return a;
-- }
--
-- private:
-- DISALLOW_EVIL_CONSTRUCTORS(NullWalker);
--};
--
--// Called after visiting re's children. child_args contains the return
--// value from each of the children's PostVisits (i.e., whether each child
--// can match an empty string). Returns whether this clause can match an
--// empty string.
--bool NullWalker::PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
-- bool* child_args, int nchild_args) {
-- return false;
--}
--
--// Returns whether re can match an empty string.
--void Regexp::NullWalk() {
-- NullWalker w;
-- w.Walk(this, false);
--}
--
--} // namespace re2
-diff --git a/re2/re2/testing/parse_test.cc b/re2/re2/testing/parse_test.cc
-deleted file mode 100644
-index f895316..0000000
---- a/re2/re2/testing/parse_test.cc
-+++ /dev/null
-@@ -1,376 +0,0 @@
--// Copyright 2006 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Test parse.cc, dump.cc, and tostring.cc.
--
--#include <string>
--#include <vector>
--#include "util/test.h"
--#include "re2/regexp.h"
--
--namespace re2 {
--
--struct Test {
-- const char* regexp;
-- const char* parse;
--};
--
--static Test tests[] = {
-- // Base cases
-- { "a", "lit{a}" },
-- { "a.", "cat{lit{a}dot{}}" },
-- { "a.b", "cat{lit{a}dot{}lit{b}}" },
-- { "ab", "str{ab}" },
-- { "a.b.c", "cat{lit{a}dot{}lit{b}dot{}lit{c}}" },
-- { "abc", "str{abc}" },
-- { "a|^", "alt{lit{a}bol{}}" },
-- { "a|b", "cc{0x61-0x62}" },
-- { "(a)", "cap{lit{a}}" },
-- { "(a)|b", "alt{cap{lit{a}}lit{b}}" },
-- { "a*", "star{lit{a}}" },
-- { "a+", "plus{lit{a}}" },
-- { "a?", "que{lit{a}}" },
-- { "a{2}", "rep{2,2 lit{a}}" },
-- { "a{2,3}", "rep{2,3 lit{a}}" },
-- { "a{2,}", "rep{2,-1 lit{a}}" },
-- { "a*?", "nstar{lit{a}}" },
-- { "a+?", "nplus{lit{a}}" },
-- { "a??", "nque{lit{a}}" },
-- { "a{2}?", "nrep{2,2 lit{a}}" },
-- { "a{2,3}?", "nrep{2,3 lit{a}}" },
-- { "a{2,}?", "nrep{2,-1 lit{a}}" },
-- { "", "emp{}" },
-- { "|", "emp{}" }, // alt{emp{}emp{}} but got factored
-- { "|x|", "alt{emp{}lit{x}emp{}}" },
-- { ".", "dot{}" },
-- { "^", "bol{}" },
-- { "$", "eol{}" },
-- { "\\|", "lit{|}" },
-- { "\\(", "lit{(}" },
-- { "\\)", "lit{)}" },
-- { "\\*", "lit{*}" },
-- { "\\+", "lit{+}" },
-- { "\\?", "lit{?}" },
-- { "{", "lit{{}" },
-- { "}", "lit{}}" },
-- { "\\.", "lit{.}" },
-- { "\\^", "lit{^}" },
-- { "\\$", "lit{$}" },
-- { "\\\\", "lit{\\}" },
-- { "[ace]", "cc{0x61 0x63 0x65}" },
-- { "[abc]", "cc{0x61-0x63}" },
-- { "[a-z]", "cc{0x61-0x7a}" },
-- { "[a]", "lit{a}" },
-- { "\\-", "lit{-}" },
-- { "-", "lit{-}" },
-- { "\\_", "lit{_}" },
--
-- // Posix and Perl extensions
-- { "[[:lower:]]", "cc{0x61-0x7a}" },
-- { "[a-z]", "cc{0x61-0x7a}" },
-- { "[^[:lower:]]", "cc{0-0x60 0x7b-0x10ffff}" },
-- { "[[:^lower:]]", "cc{0-0x60 0x7b-0x10ffff}" },
-- { "(?i)[[:lower:]]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" },
-- { "(?i)[a-z]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" },
-- { "(?i)[^[:lower:]]", "cc{0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}" },
-- { "(?i)[[:^lower:]]", "cc{0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}" },
-- { "\\d", "cc{0x30-0x39}" },
-- { "\\D", "cc{0-0x2f 0x3a-0x10ffff}" },
-- { "\\s", "cc{0x9-0xa 0xc-0xd 0x20}" },
-- { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}" },
-- { "\\w", "cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a}" },
-- { "\\W", "cc{0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x10ffff}" },
-- { "(?i)\\w", "cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a 0x17f 0x212a}" },
-- { "(?i)\\W", "cc{0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}" },
-- { "[^\\\\]", "cc{0-0x5b 0x5d-0x10ffff}" },
-- { "\\C", "byte{}" },
--
-- // Unicode, negatives, and a double negative.
-- { "\\p{Braille}", "cc{0x2800-0x28ff}" },
-- { "\\P{Braille}", "cc{0-0x27ff 0x2900-0x10ffff}" },
-- { "\\p{^Braille}", "cc{0-0x27ff 0x2900-0x10ffff}" },
-- { "\\P{^Braille}", "cc{0x2800-0x28ff}" },
--
-- // More interesting regular expressions.
-- { "a{,2}", "str{a{,2}}" },
-- { "\\.\\^\\$\\\\", "str{.^$\\}" },
-- { "[a-zABC]", "cc{0x41-0x43 0x61-0x7a}" },
-- { "[^a]", "cc{0-0x60 0x62-0x10ffff}" },
-- { "[\xce\xb1-\xce\xb5\xe2\x98\xba]", "cc{0x3b1-0x3b5 0x263a}" }, // utf-8
-- { "a*{", "cat{star{lit{a}}lit{{}}" },
--
-- // Test precedences
-- { "(?:ab)*", "star{str{ab}}" },
-- { "(ab)*", "star{cap{str{ab}}}" },
-- { "ab|cd", "alt{str{ab}str{cd}}" },
-- { "a(b|c)d", "cat{lit{a}cap{cc{0x62-0x63}}lit{d}}" },
--
-- // Test flattening.
-- { "(?:a)", "lit{a}" },
-- { "(?:ab)(?:cd)", "str{abcd}" },
-- { "(?:a|b)|(?:c|d)", "cc{0x61-0x64}" },
-- { "a|.", "dot{}" },
-- { ".|a", "dot{}" },
--
-- // Test Perl quoted literals
-- { "\\Q+|*?{[\\E", "str{+|*?{[}" },
-- { "\\Q+\\E+", "plus{lit{+}}" },
-- { "\\Q\\\\E", "lit{\\}" },
-- { "\\Q\\\\\\E", "str{\\\\}" },
--
-- // Test Perl \A and \z
-- { "(?m)^", "bol{}" },
-- { "(?m)$", "eol{}" },
-- { "(?-m)^", "bot{}" },
-- { "(?-m)$", "eot{}" },
-- { "(?m)\\A", "bot{}" },
-- { "(?m)\\z", "eot{\\z}" },
-- { "(?-m)\\A", "bot{}" },
-- { "(?-m)\\z", "eot{\\z}" },
--
-- // Test named captures
-- { "(?P<name>a)", "cap{name:lit{a}}" },
--
-- // Case-folded literals
-- { "[Aa]", "litfold{a}" },
--
-- // Strings
-- { "abcde", "str{abcde}" },
-- { "[Aa][Bb]cd", "cat{strfold{ab}str{cd}}" },
--};
--
--static Regexp::ParseFlags kTestFlags = Regexp::MatchNL |
-- Regexp::PerlX |
-- Regexp::PerlClasses |
-- Regexp::UnicodeGroups;
--
--bool RegexpEqualTestingOnly(Regexp* a, Regexp* b) {
-- return Regexp::Equal(a, b);
--}
--
--void TestParse(const Test* tests, int ntests, Regexp::ParseFlags flags,
-- const string& title) {
-- Regexp** re = new Regexp*[ntests];
-- for (int i = 0; i < ntests; i++) {
-- RegexpStatus status;
-- re[i] = Regexp::Parse(tests[i].regexp, flags, &status);
-- CHECK(re[i] != NULL) << " " << tests[i].regexp << " "
-- << status.Text();
-- string s = re[i]->Dump();
-- EXPECT_EQ(string(tests[i].parse), s) << "Regexp: " << tests[i].regexp
-- << "\nparse: " << tests[i].parse << " s: " << s;
-- }
--
-- for (int i = 0; i < ntests; i++) {
-- for (int j = 0; j < ntests; j++) {
-- EXPECT_EQ(string(tests[i].parse) == tests[j].parse,
-- RegexpEqualTestingOnly(re[i], re[j]))
-- << "Regexp: " << tests[i].regexp << " " << tests[j].regexp;
-- }
-- }
--
-- for (int i = 0; i < ntests; i++)
-- re[i]->Decref();
-- delete[] re;
--}
--
--// Test that regexps parse to expected structures.
--TEST(TestParse, SimpleRegexps) {
-- TestParse(tests, arraysize(tests), kTestFlags, "simple");
--}
--
--Test foldcase_tests[] = {
-- { "AbCdE", "strfold{abcde}" },
-- { "[Aa]", "litfold{a}" },
-- { "a", "litfold{a}" },
--
-- // 0x17F is an old English long s (looks like an f) and folds to s.
-- // 0x212A is the Kelvin symbol and folds to k.
-- { "A[F-g]", "cat{litfold{a}cc{0x41-0x7a 0x17f 0x212a}}" }, // [Aa][A-z...]
-- { "[[:upper:]]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" },
-- { "[[:lower:]]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" },
--};
--
--// Test that parsing with FoldCase works.
--TEST(TestParse, FoldCase) {
-- TestParse(foldcase_tests, arraysize(foldcase_tests), Regexp::FoldCase, "foldcase");
--}
--
--Test literal_tests[] = {
-- { "(|)^$.[*+?]{5,10},\\", "str{(|)^$.[*+?]{5,10},\\}" },
--};
--
--// Test that parsing with Literal works.
--TEST(TestParse, Literal) {
-- TestParse(literal_tests, arraysize(literal_tests), Regexp::Literal, "literal");
--}
--
--Test matchnl_tests[] = {
-- { ".", "dot{}" },
-- { "\n", "lit{\n}" },
-- { "[^a]", "cc{0-0x60 0x62-0x10ffff}" },
-- { "[a\\n]", "cc{0xa 0x61}" },
--};
--
--// Test that parsing with MatchNL works.
--// (Also tested above during simple cases.)
--TEST(TestParse, MatchNL) {
-- TestParse(matchnl_tests, arraysize(matchnl_tests), Regexp::MatchNL, "with MatchNL");
--}
--
--Test nomatchnl_tests[] = {
-- { ".", "cc{0-0x9 0xb-0x10ffff}" },
-- { "\n", "lit{\n}" },
-- { "[^a]", "cc{0-0x9 0xb-0x60 0x62-0x10ffff}" },
-- { "[a\\n]", "cc{0xa 0x61}" },
--};
--
--// Test that parsing without MatchNL works.
--TEST(TestParse, NoMatchNL) {
-- TestParse(nomatchnl_tests, arraysize(nomatchnl_tests), Regexp::NoParseFlags, "without MatchNL");
--}
--
--Test prefix_tests[] = {
-- { "abc|abd", "cat{str{ab}cc{0x63-0x64}}" },
-- { "a(?:b)c|abd", "cat{str{ab}cc{0x63-0x64}}" },
-- { "abc|abd|aef|bcx|bcy",
-- "alt{cat{lit{a}alt{cat{lit{b}cc{0x63-0x64}}str{ef}}}"
-- "cat{str{bc}cc{0x78-0x79}}}" },
-- { "abc|x|abd", "alt{str{abc}lit{x}str{abd}}" },
-- { "(?i)abc|ABD", "cat{strfold{ab}cc{0x43-0x44 0x63-0x64}}" },
-- { "[ab]c|[ab]d", "cat{cc{0x61-0x62}cc{0x63-0x64}}" },
-- { "(?:xx|yy)c|(?:xx|yy)d",
-- "cat{alt{str{xx}str{yy}}cc{0x63-0x64}}" },
-- { "x{2}|x{2}[0-9]",
-- "cat{rep{2,2 lit{x}}alt{emp{}cc{0x30-0x39}}}" },
-- { "x{2}y|x{2}[0-9]y",
-- "cat{rep{2,2 lit{x}}alt{lit{y}cat{cc{0x30-0x39}lit{y}}}}" },
--};
--
--// Test that prefix factoring works.
--TEST(TestParse, Prefix) {
-- TestParse(prefix_tests, arraysize(prefix_tests), Regexp::PerlX, "prefix");
--}
--
--// Invalid regular expressions
--const char* badtests[] = {
-- "(",
-- ")",
-- "(a",
-- "(a|b|",
-- "(a|b",
-- "[a-z",
-- "([a-z)",
-- "x{1001}",
-- "\xff", // Invalid UTF-8
-- "[\xff]",
-- "[\\\xff]",
-- "\\\xff",
-- "(?P<name>a",
-- "(?P<name>",
-- "(?P<name",
-- "(?P<x y>a)",
-- "(?P<>a)",
-- "[a-Z]",
-- "(?i)[a-Z]",
-- "a{100000}",
-- "a{100000,}",
--};
--
--// Valid in Perl, bad in POSIX
--const char* only_perl[] = {
-- "[a-b-c]",
-- "\\Qabc\\E",
-- "\\Q*+?{[\\E",
-- "\\Q\\\\E",
-- "\\Q\\\\\\E",
-- "\\Q\\\\\\\\E",
-- "\\Q\\\\\\\\\\E",
-- "(?:a)",
-- "(?P<name>a)",
--};
--
--// Valid in POSIX, bad in Perl.
--const char* only_posix[] = {
-- "a++",
-- "a**",
-- "a?*",
-- "a+*",
-- "a{1}*",
--};
--
--// Test that parser rejects bad regexps.
--TEST(TestParse, InvalidRegexps) {
-- for (int i = 0; i < arraysize(badtests); i++) {
-- CHECK(Regexp::Parse(badtests[i], Regexp::PerlX, NULL) == NULL)
-- << " " << badtests[i];
-- CHECK(Regexp::Parse(badtests[i], Regexp::NoParseFlags, NULL) == NULL)
-- << " " << badtests[i];
-- }
-- for (int i = 0; i < arraysize(only_posix); i++) {
-- CHECK(Regexp::Parse(only_posix[i], Regexp::PerlX, NULL) == NULL)
-- << " " << only_posix[i];
-- Regexp* re = Regexp::Parse(only_posix[i], Regexp::NoParseFlags, NULL);
-- CHECK(re) << " " << only_posix[i];
-- re->Decref();
-- }
-- for (int i = 0; i < arraysize(only_perl); i++) {
-- CHECK(Regexp::Parse(only_perl[i], Regexp::NoParseFlags, NULL) == NULL)
-- << " " << only_perl[i];
-- Regexp* re = Regexp::Parse(only_perl[i], Regexp::PerlX, NULL);
-- CHECK(re) << " " << only_perl[i];
-- re->Decref();
-- }
--}
--
--// Test that ToString produces original regexp or equivalent one.
--TEST(TestToString, EquivalentParse) {
-- for (int i = 0; i < arraysize(tests); i++) {
-- RegexpStatus status;
-- Regexp* re = Regexp::Parse(tests[i].regexp, kTestFlags, &status);
-- CHECK(re != NULL) << " " << tests[i].regexp << " " << status.Text();
-- string s = re->Dump();
-- EXPECT_EQ(string(tests[i].parse), s);
-- string t = re->ToString();
-- if (t != tests[i].regexp) {
-- // If ToString didn't return the original regexp,
-- // it must have found one with fewer parens.
-- // Unfortunately we can't check the length here, because
-- // ToString produces "\\{" for a literal brace,
-- // but "{" is a shorter equivalent.
-- // CHECK_LT(t.size(), strlen(tests[i].regexp))
-- // << " t=" << t << " regexp=" << tests[i].regexp;
--
-- // Test that if we parse the new regexp we get the same structure.
-- Regexp* nre = Regexp::Parse(t, Regexp::MatchNL | Regexp::PerlX, &status);
-- CHECK(nre != NULL) << " reparse " << t << " " << status.Text();
-- string ss = nre->Dump();
-- string tt = nre->ToString();
-- if (s != ss || t != tt)
-- LOG(INFO) << "ToString(" << tests[i].regexp << ") = " << t;
-- EXPECT_EQ(s, ss);
-- EXPECT_EQ(t, tt);
-- nre->Decref();
-- }
-- re->Decref();
-- }
--}
--
--// Test that capture error args are correct.
--TEST(NamedCaptures, ErrorArgs) {
-- RegexpStatus status;
-- Regexp* re;
--
-- re = Regexp::Parse("test(?P<name", Regexp::LikePerl, &status);
-- EXPECT_TRUE(re == NULL);
-- EXPECT_EQ(status.code(), kRegexpBadNamedCapture);
-- EXPECT_EQ(status.error_arg(), "(?P<name");
--
-- re = Regexp::Parse("test(?P<space bar>z)", Regexp::LikePerl, &status);
-- EXPECT_TRUE(re == NULL);
-- EXPECT_EQ(status.code(), kRegexpBadNamedCapture);
-- EXPECT_EQ(status.error_arg(), "(?P<space bar>");
--}
--
--} // namespace re2
-diff --git a/re2/re2/testing/possible_match_test.cc b/re2/re2/testing/possible_match_test.cc
-deleted file mode 100644
-index 7c2400e..0000000
---- a/re2/re2/testing/possible_match_test.cc
-+++ /dev/null
-@@ -1,240 +0,0 @@
--// Copyright 2006-2008 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--#include <vector>
--#include "util/test.h"
--#include "re2/prog.h"
--#include "re2/re2.h"
--#include "re2/regexp.h"
--#include "re2/testing/regexp_generator.h"
--#include "re2/testing/string_generator.h"
--
--namespace re2 {
--
--// Test that C++ strings are compared as uint8s, not int8s.
--// PossibleMatchRange doesn't depend on this, but callers probably will.
--TEST(CplusplusStrings, EightBit) {
-- string s = "\x70";
-- string t = "\xA0";
-- EXPECT_LT(s, t);
--}
--
--struct PrefixTest {
-- const char* regexp;
-- int maxlen;
-- const char* min;
-- const char* max;
--};
--
--static PrefixTest tests[] = {
-- { "", 10, "", "", },
-- { "Abcdef", 10, "Abcdef", "Abcdef" },
-- { "abc(def|ghi)", 10, "abcdef", "abcghi" },
-- { "a+hello", 10, "aa", "ahello" },
-- { "a*hello", 10, "a", "hello" },
-- { "def|abc", 10, "abc", "def" },
-- { "a(b)(c)[d]", 10, "abcd", "abcd" },
-- { "ab(cab|cat)", 10, "abcab", "abcat" },
-- { "ab(cab|ca)x", 10, "abcabx", "abcax" },
-- { "(ab|x)(c|de)", 10, "abc", "xde" },
-- { "(ab|x)?(c|z)?", 10, "", "z" },
-- { "[^\\s\\S]", 10, "", "" },
-- { "(abc)+", 5, "abc", "abcac" },
-- { "(abc)+", 2, "ab", "ac" },
-- { "(abc)+", 1, "a", "b" },
-- { "[a\xC3\xA1]", 4, "a", "\xC3\xA1" },
-- { "a*", 10, "", "ab" },
--
-- { "(?i)Abcdef", 10, "ABCDEF", "abcdef" },
-- { "(?i)abc(def|ghi)", 10, "ABCDEF", "abcghi" },
-- { "(?i)a+hello", 10, "AA", "ahello" },
-- { "(?i)a*hello", 10, "A", "hello" },
-- { "(?i)def|abc", 10, "ABC", "def" },
-- { "(?i)a(b)(c)[d]", 10, "ABCD", "abcd" },
-- { "(?i)ab(cab|cat)", 10, "ABCAB", "abcat" },
-- { "(?i)ab(cab|ca)x", 10, "ABCABX", "abcax" },
-- { "(?i)(ab|x)(c|de)", 10, "ABC", "xde" },
-- { "(?i)(ab|x)?(c|z)?", 10, "", "z" },
-- { "(?i)[^\\s\\S]", 10, "", "" },
-- { "(?i)(abc)+", 5, "ABC", "abcac" },
-- { "(?i)(abc)+", 2, "AB", "ac" },
-- { "(?i)(abc)+", 1, "A", "b" },
-- { "(?i)[a\xC3\xA1]", 4, "A", "\xC3\xA1" },
-- { "(?i)a*", 10, "", "ab" },
-- { "(?i)A*", 10, "", "ab" },
--
-- { "\\AAbcdef", 10, "Abcdef", "Abcdef" },
-- { "\\Aabc(def|ghi)", 10, "abcdef", "abcghi" },
-- { "\\Aa+hello", 10, "aa", "ahello" },
-- { "\\Aa*hello", 10, "a", "hello" },
-- { "\\Adef|abc", 10, "abc", "def" },
-- { "\\Aa(b)(c)[d]", 10, "abcd", "abcd" },
-- { "\\Aab(cab|cat)", 10, "abcab", "abcat" },
-- { "\\Aab(cab|ca)x", 10, "abcabx", "abcax" },
-- { "\\A(ab|x)(c|de)", 10, "abc", "xde" },
-- { "\\A(ab|x)?(c|z)?", 10, "", "z" },
-- { "\\A[^\\s\\S]", 10, "", "" },
-- { "\\A(abc)+", 5, "abc", "abcac" },
-- { "\\A(abc)+", 2, "ab", "ac" },
-- { "\\A(abc)+", 1, "a", "b" },
-- { "\\A[a\xC3\xA1]", 4, "a", "\xC3\xA1" },
-- { "\\Aa*", 10, "", "ab" },
--
-- { "(?i)\\AAbcdef", 10, "ABCDEF", "abcdef" },
-- { "(?i)\\Aabc(def|ghi)", 10, "ABCDEF", "abcghi" },
-- { "(?i)\\Aa+hello", 10, "AA", "ahello" },
-- { "(?i)\\Aa*hello", 10, "A", "hello" },
-- { "(?i)\\Adef|abc", 10, "ABC", "def" },
-- { "(?i)\\Aa(b)(c)[d]", 10, "ABCD", "abcd" },
-- { "(?i)\\Aab(cab|cat)", 10, "ABCAB", "abcat" },
-- { "(?i)\\Aab(cab|ca)x", 10, "ABCABX", "abcax" },
-- { "(?i)\\A(ab|x)(c|de)", 10, "ABC", "xde" },
-- { "(?i)\\A(ab|x)?(c|z)?", 10, "", "z" },
-- { "(?i)\\A[^\\s\\S]", 10, "", "" },
-- { "(?i)\\A(abc)+", 5, "ABC", "abcac" },
-- { "(?i)\\A(abc)+", 2, "AB", "ac" },
-- { "(?i)\\A(abc)+", 1, "A", "b" },
-- { "(?i)\\A[a\xC3\xA1]", 4, "A", "\xC3\xA1" },
-- { "(?i)\\Aa*", 10, "", "ab" },
-- { "(?i)\\AA*", 10, "", "ab" },
--};
--
--TEST(PossibleMatchRange, HandWritten) {
-- for (int i = 0; i < arraysize(tests); i++) {
-- for (int j = 0; j < 2; j++) {
-- const PrefixTest& t = tests[i];
-- string min, max;
-- if (j == 0) {
-- LOG(INFO) << "Checking regexp=" << CEscape(t.regexp);
-- Regexp* re = Regexp::Parse(t.regexp, Regexp::LikePerl, NULL);
-- CHECK(re);
-- Prog* prog = re->CompileToProg(0);
-- CHECK(prog);
-- CHECK(prog->PossibleMatchRange(&min, &max, t.maxlen))
-- << " " << t.regexp;
-- delete prog;
-- re->Decref();
-- } else {
-- CHECK(RE2(t.regexp).PossibleMatchRange(&min, &max, t.maxlen));
-- }
-- EXPECT_EQ(t.min, min) << t.regexp;
-- EXPECT_EQ(t.max, max) << t.regexp;
-- }
-- }
--}
--
--// Test cases where PossibleMatchRange should return false.
--TEST(PossibleMatchRange, Failures) {
-- string min, max;
--
-- // Fails because no room to write max.
-- EXPECT_FALSE(RE2("abc").PossibleMatchRange(&min, &max, 0));
--
-- // Fails because there is no max -- any non-empty string matches
-- // or begins a match. Have to use Latin-1 input, because there
-- // are no valid UTF-8 strings beginning with byte 0xFF.
-- EXPECT_FALSE(RE2("[\\s\\S]+", RE2::Latin1).
-- PossibleMatchRange(&min, &max, 10))
-- << "min=" << CEscape(min) << ", max=" << CEscape(max);
-- EXPECT_FALSE(RE2("[\\0-\xFF]+", RE2::Latin1).
-- PossibleMatchRange(&min, &max, 10))
-- << "min=" << CEscape(min) << ", max=" << CEscape(max);
-- EXPECT_FALSE(RE2(".+hello", RE2::Latin1).
-- PossibleMatchRange(&min, &max, 10))
-- << "min=" << CEscape(min) << ", max=" << CEscape(max);
-- EXPECT_FALSE(RE2(".*hello", RE2::Latin1).
-- PossibleMatchRange(&min, &max, 10))
-- << "min=" << CEscape(min) << ", max=" << CEscape(max);
-- EXPECT_FALSE(RE2(".*", RE2::Latin1).
-- PossibleMatchRange(&min, &max, 10))
-- << "min=" << CEscape(min) << ", max=" << CEscape(max);
-- EXPECT_FALSE(RE2("\\C*").
-- PossibleMatchRange(&min, &max, 10))
-- << "min=" << CEscape(min) << ", max=" << CEscape(max);
--
-- // Fails because it's a malformed regexp.
-- EXPECT_FALSE(RE2("*hello").PossibleMatchRange(&min, &max, 10))
-- << "min=" << CEscape(min) << ", max=" << CEscape(max);
--}
--
--// Exhaustive test: generate all regexps within parameters,
--// then generate all strings of a given length over a given alphabet,
--// then check that the prefix information agrees with whether
--// the regexp matches each of the strings.
--class PossibleMatchTester : public RegexpGenerator {
-- public:
-- PossibleMatchTester(int maxatoms,
-- int maxops,
-- const vector<string>& alphabet,
-- const vector<string>& ops,
-- int maxstrlen,
-- const vector<string>& stralphabet)
-- : RegexpGenerator(maxatoms, maxops, alphabet, ops),
-- strgen_(maxstrlen, stralphabet),
-- regexps_(0), tests_(0) { }
--
-- int regexps() { return regexps_; }
-- int tests() { return tests_; }
--
-- // Needed for RegexpGenerator interface.
-- void HandleRegexp(const string& regexp);
--
-- private:
-- StringGenerator strgen_;
--
-- int regexps_; // Number of HandleRegexp calls
-- int tests_; // Number of regexp tests.
--
-- DISALLOW_EVIL_CONSTRUCTORS(PossibleMatchTester);
--};
--
--// Processes a single generated regexp.
--// Checks that all accepted strings agree with the prefix range.
--void PossibleMatchTester::HandleRegexp(const string& regexp) {
-- regexps_++;
--
-- VLOG(3) << CEscape(regexp);
--
-- RE2 re(regexp, RE2::Latin1);
-- CHECK_EQ(re.error(), "");
--
-- string min, max;
-- if(!re.PossibleMatchRange(&min, &max, 10)) {
-- // There's no good max for "\\C*". Can't use strcmp
-- // because sometimes it gets embedded in more
-- // complicated expressions.
-- if(strstr(regexp.c_str(), "\\C*"))
-- return;
-- LOG(QFATAL) << "PossibleMatchRange failed on: " << CEscape(regexp);
-- }
--
-- strgen_.Reset();
-- while (strgen_.HasNext()) {
-- const StringPiece& s = strgen_.Next();
-- tests_++;
-- if (!RE2::FullMatch(s, re))
-- continue;
-- CHECK_GE(s, min) << " regexp: " << regexp << " max: " << max;
-- CHECK_LE(s, max) << " regexp: " << regexp << " min: " << min;
-- }
--}
--
--TEST(PossibleMatchRange, Exhaustive) {
-- int natom = 3;
-- int noperator = 3;
-- int stringlen = 5;
-- if (DEBUG_MODE) {
-- natom = 2;
-- noperator = 3;
-- stringlen = 3;
-- }
-- PossibleMatchTester t(natom, noperator, Split(" ", "a b [0-9]"),
-- RegexpGenerator::EgrepOps(),
-- stringlen, Explode("ab4"));
-- t.Generate();
-- LOG(INFO) << t.regexps() << " regexps, "
-- << t.tests() << " tests";
--}
--
--} // namespace re2
-diff --git a/re2/re2/testing/random_test.cc b/re2/re2/testing/random_test.cc
-deleted file mode 100644
-index 91d2b32..0000000
---- a/re2/re2/testing/random_test.cc
-+++ /dev/null
-@@ -1,95 +0,0 @@
--// Copyright 2008 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Random testing of regular expression matching.
--
--#include <stdio.h>
--#include "util/test.h"
--#include "re2/testing/exhaustive_tester.h"
--
--DEFINE_int32(regexpseed, 404, "Random regexp seed.");
--DEFINE_int32(regexpcount, 100, "How many random regexps to generate.");
--DEFINE_int32(stringseed, 200, "Random string seed.");
--DEFINE_int32(stringcount, 100, "How many random strings to generate.");
--
--namespace re2 {
--
--// Runs a random test on the given parameters.
--// (Always uses the same random seeds for reproducibility.
--// Can give different seeds on command line.)
--static void RandomTest(int maxatoms, int maxops,
-- const vector<string>& alphabet,
-- const vector<string>& ops,
-- int maxstrlen, const vector<string>& stralphabet,
-- const string& wrapper) {
-- // Limit to smaller test cases in debug mode,
-- // because everything is so much slower.
-- if (DEBUG_MODE) {
-- maxatoms--;
-- maxops--;
-- maxstrlen /= 2;
-- }
--
-- ExhaustiveTester t(maxatoms, maxops, alphabet, ops,
-- maxstrlen, stralphabet, wrapper, "");
-- t.RandomStrings(FLAGS_stringseed, FLAGS_stringcount);
-- t.GenerateRandom(FLAGS_regexpseed, FLAGS_regexpcount);
-- printf("%d regexps, %d tests, %d failures [%d/%d str]\n",
-- t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size());
-- EXPECT_EQ(0, t.failures());
--}
--
--// Tests random small regexps involving literals and egrep operators.
--TEST(Random, SmallEgrepLiterals) {
-- RandomTest(5, 5, Explode("abc."), RegexpGenerator::EgrepOps(),
-- 15, Explode("abc"),
-- "");
--}
--
--// Tests random bigger regexps involving literals and egrep operators.
--TEST(Random, BigEgrepLiterals) {
-- RandomTest(10, 10, Explode("abc."), RegexpGenerator::EgrepOps(),
-- 15, Explode("abc"),
-- "");
--}
--
--// Tests random small regexps involving literals, capturing parens,
--// and egrep operators.
--TEST(Random, SmallEgrepCaptures) {
-- RandomTest(5, 5, Split(" ", "a (b) ."), RegexpGenerator::EgrepOps(),
-- 15, Explode("abc"),
-- "");
--}
--
--// Tests random bigger regexps involving literals, capturing parens,
--// and egrep operators.
--TEST(Random, BigEgrepCaptures) {
-- RandomTest(10, 10, Split(" ", "a (b) ."), RegexpGenerator::EgrepOps(),
-- 15, Explode("abc"),
-- "");
--}
--
--// Tests random large complicated expressions, using all the possible
--// operators, some literals, some parenthesized literals, and predefined
--// character classes like \d. (Adding larger character classes would
--// make for too many possibilities.)
--TEST(Random, Complicated) {
-- vector<string> ops = Split(" ",
-- "%s%s %s|%s %s* %s*? %s+ %s+? %s? %s?? "
-- "%s{0} %s{0,} %s{1} %s{1,} %s{0,1} %s{0,2} %s{1,2} "
-- "%s{2} %s{2,} %s{3,4} %s{4,5}");
--
-- // Use (?:\b) and (?:\B) instead of \b and \B,
-- // because PCRE rejects \b* but accepts (?:\b)*.
-- // Ditto ^ and $.
-- vector<string> atoms = Split(" ",
-- ". (?:^) (?:$) \\a \\f \\n \\r \\t \\v "
-- "\\d \\D \\s \\S \\w \\W (?:\\b) (?:\\B) "
-- "a (a) b c - \\\\");
-- vector<string> alphabet = Explode("abc123\001\002\003\t\r\n\v\f\a");
-- RandomTest(10, 10, atoms, ops, 20, alphabet, "");
--}
--
--} // namespace re2
--
-diff --git a/re2/re2/testing/re2_arg_test.cc b/re2/re2/testing/re2_arg_test.cc
-deleted file mode 100644
-index 0a77d95..0000000
---- a/re2/re2/testing/re2_arg_test.cc
-+++ /dev/null
-@@ -1,132 +0,0 @@
--// Copyright 2005 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// This tests to make sure numbers are parsed from strings
--// correctly.
--// Todo: Expand the test to validate strings parsed to the other types
--// supported by RE2::Arg class
--
--#include "util/test.h"
--#include "re2/re2.h"
--
--namespace re2 {
--
--struct SuccessTable {
-- const char * value_string;
-- int64 value;
-- bool success[6];
--};
--
--// Test boundary cases for different integral sizes.
--// Specifically I want to make sure that values outside the boundries
--// of an integral type will fail and that negative numbers will fail
--// for unsigned types. The following table contains the boundaries for
--// the various integral types and has entries for whether or not each
--// type can contain the given value.
--const SuccessTable kSuccessTable[] = {
--// string integer value short ushort int uint int64 uint64
--// 0 to 2^7-1
--{ "0", 0, { true, true, true, true, true, true }},
--{ "127", 127, { true, true, true, true, true, true }},
--
--// -1 to -2^7
--{ "-1", -1, { true, false, true, false, true, false }},
--{ "-128", -128, { true, false, true, false, true, false }},
--
--// 2^7 to 2^8-1
--{ "128", 128, { true, true, true, true, true, true }},
--{ "255", 255, { true, true, true, true, true, true }},
--
--// 2^8 to 2^15-1
--{ "256", 256, { true, true, true, true, true, true }},
--{ "32767", 32767, { true, true, true, true, true, true }},
--
--// -2^7-1 to -2^15
--{ "-129", -129, { true, false, true, false, true, false }},
--{ "-32768", -32768, { true, false, true, false, true, false }},
--
--// 2^15 to 2^16-1
--{ "32768", 32768, { false, true, true, true, true, true }},
--{ "65535", 65535, { false, true, true, true, true, true }},
--
--// 2^16 to 2^31-1
--{ "65536", 65536, { false, false, true, true, true, true }},
--{ "2147483647", 2147483647, { false, false, true, true, true, true }},
--
--// -2^15-1 to -2^31
--{ "-32769", -32769, { false, false, true, false, true, false }},
--{ "-2147483648",
-- 0xFFFFFFFF80000000LL, { false, false, true, false, true, false }},
--
--// 2^31 to 2^32-1
--{ "2147483648", 2147483648U, { false, false, false, true, true, true }},
--{ "4294967295", 4294967295U, { false, false, false, true, true, true }},
--
--// 2^32 to 2^63-1
--{ "4294967296", 4294967296LL, { false, false, false, false, true, true }},
--{ "9223372036854775807",
-- 9223372036854775807LL, { false, false, false, false, true, true }},
--
--// -2^31-1 to -2^63
--{ "-2147483649", -2147483649LL, { false, false, false, false, true, false }},
--{ "-9223372036854775808",
-- 0x8000000000000000LL, { false, false, false, false, true, false }},
--
--// 2^63 to 2^64-1
--{ "9223372036854775808",
-- 9223372036854775808ULL, { false, false, false, false, false, true }},
--{ "18446744073709551615",
-- 18446744073709551615ULL, { false, false, false, false, false, true }},
--
--// >= 2^64
--{ "18446744073709551616", 0, { false, false, false, false, false, false }},
--};
--
--const int kNumStrings = ARRAYSIZE(kSuccessTable);
--
--// It's ugly to use a macro, but we apparently can't use the ASSERT_TRUE_M
--// macro outside of a TEST block and this seems to be the only way to
--// avoid code duplication. I can also pull off a couple nice tricks
--// using concatenation for the type I'm checking against.
--#define PARSE_FOR_TYPE(type, column) { \
-- type r; \
-- for ( int i = 0; i < kNumStrings; ++i ) { \
-- RE2::Arg arg(&r); \
-- const char* const p = kSuccessTable[i].value_string; \
-- bool retval = arg.Parse(p, strlen(p)); \
-- bool success = kSuccessTable[i].success[column]; \
-- ASSERT_TRUE_M(retval == success, \
-- StringPrintf("Parsing '%s' for type " #type " should return %d", \
-- p, success).c_str()); \
-- if ( success ) { \
-- ASSERT_EQUALS(r, kSuccessTable[i].value); \
-- } \
-- } \
--}
--
--TEST(REArgTest, Int16Test) {
-- PARSE_FOR_TYPE(int16, 0);
--}
--
--TEST(REArgTest, Uint16Test) {
-- PARSE_FOR_TYPE(uint16, 1);
--}
--
--TEST(REArgTest, IntTest) {
-- PARSE_FOR_TYPE(int, 2);
--}
--
--TEST(REArgTest, UInt32Test) {
-- PARSE_FOR_TYPE(uint32, 3);
--}
--
--TEST(REArgTest, Iint64Test) {
-- PARSE_FOR_TYPE(int64, 4);
--}
--
--TEST(REArgTest, Uint64Test) {
-- PARSE_FOR_TYPE(uint64, 5);
--}
--
--} // namespace re2
-diff --git a/re2/re2/testing/re2_test.cc b/re2/re2/testing/re2_test.cc
-deleted file mode 100644
-index 031c8b8..0000000
---- a/re2/re2/testing/re2_test.cc
-+++ /dev/null
-@@ -1,1363 +0,0 @@
--// -*- coding: utf-8 -*-
--// Copyright 2002-2009 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// TODO: Test extractions for PartialMatch/Consume
--
--#include <sys/types.h>
--#include <sys/mman.h>
--#include <sys/stat.h>
--#include <errno.h>
--#include <vector>
--#include "util/test.h"
--#include "re2/re2.h"
--#include "re2/regexp.h"
--
--DECLARE_bool(logtostderr);
--
--namespace re2 {
--
--TEST(RE2, HexTests) {
--
-- VLOG(1) << "hex tests";
--
--#define CHECK_HEX(type, value) \
-- do { \
-- type v; \
-- CHECK(RE2::FullMatch(#value, "([0-9a-fA-F]+)[uUlL]*", RE2::Hex(&v))); \
-- CHECK_EQ(v, 0x ## value); \
-- CHECK(RE2::FullMatch("0x" #value, "([0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v))); \
-- CHECK_EQ(v, 0x ## value); \
-- } while(0)
--
-- CHECK_HEX(short, 2bad);
-- CHECK_HEX(unsigned short, 2badU);
-- CHECK_HEX(int, dead);
-- CHECK_HEX(unsigned int, deadU);
-- CHECK_HEX(long, 7eadbeefL);
-- CHECK_HEX(unsigned long, deadbeefUL);
-- CHECK_HEX(long long, 12345678deadbeefLL);
-- CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
--
--#undef CHECK_HEX
--}
--
--TEST(RE2, OctalTests) {
-- VLOG(1) << "octal tests";
--
--#define CHECK_OCTAL(type, value) \
-- do { \
-- type v; \
-- CHECK(RE2::FullMatch(#value, "([0-7]+)[uUlL]*", RE2::Octal(&v))); \
-- CHECK_EQ(v, 0 ## value); \
-- CHECK(RE2::FullMatch("0" #value, "([0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v))); \
-- CHECK_EQ(v, 0 ## value); \
-- } while(0)
--
-- CHECK_OCTAL(short, 77777);
-- CHECK_OCTAL(unsigned short, 177777U);
-- CHECK_OCTAL(int, 17777777777);
-- CHECK_OCTAL(unsigned int, 37777777777U);
-- CHECK_OCTAL(long, 17777777777L);
-- CHECK_OCTAL(unsigned long, 37777777777UL);
-- CHECK_OCTAL(long long, 777777777777777777777LL);
-- CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
--
--#undef CHECK_OCTAL
--}
--
--TEST(RE2, DecimalTests) {
-- VLOG(1) << "decimal tests";
--
--#define CHECK_DECIMAL(type, value) \
-- do { \
-- type v; \
-- CHECK(RE2::FullMatch(#value, "(-?[0-9]+)[uUlL]*", &v)); \
-- CHECK_EQ(v, value); \
-- CHECK(RE2::FullMatch(#value, "(-?[0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v))); \
-- CHECK_EQ(v, value); \
-- } while(0)
--
-- CHECK_DECIMAL(short, -1);
-- CHECK_DECIMAL(unsigned short, 9999);
-- CHECK_DECIMAL(int, -1000);
-- CHECK_DECIMAL(unsigned int, 12345U);
-- CHECK_DECIMAL(long, -10000000L);
-- CHECK_DECIMAL(unsigned long, 3083324652U);
-- CHECK_DECIMAL(long long, -100000000000000LL);
-- CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
--
--#undef CHECK_DECIMAL
--}
--
--TEST(RE2, Replace) {
-- VLOG(1) << "TestReplace";
--
-- struct ReplaceTest {
-- const char *regexp;
-- const char *rewrite;
-- const char *original;
-- const char *single;
-- const char *global;
-- int greplace_count;
-- };
-- static const ReplaceTest tests[] = {
-- { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
-- "\\2\\1ay",
-- "the quick brown fox jumps over the lazy dogs.",
-- "ethay quick brown fox jumps over the lazy dogs.",
-- "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
-- 9 },
-- { "\\w+",
-- "\\0-NOSPAM",
-- "abcd.efghi at google.com",
-- "abcd-NOSPAM.efghi at google.com",
-- "abcd-NOSPAM.efghi-NOSPAM at google-NOSPAM.com-NOSPAM",
-- 4 },
-- { "^",
-- "(START)",
-- "foo",
-- "(START)foo",
-- "(START)foo",
-- 1 },
-- { "^",
-- "(START)",
-- "",
-- "(START)",
-- "(START)",
-- 1 },
-- { "$",
-- "(END)",
-- "",
-- "(END)",
-- "(END)",
-- 1 },
-- { "b",
-- "bb",
-- "ababababab",
-- "abbabababab",
-- "abbabbabbabbabb",
-- 5 },
-- { "b",
-- "bb",
-- "bbbbbb",
-- "bbbbbbb",
-- "bbbbbbbbbbbb",
-- 6 },
-- { "b+",
-- "bb",
-- "bbbbbb",
-- "bb",
-- "bb",
-- 1 },
-- { "b*",
-- "bb",
-- "bbbbbb",
-- "bb",
-- "bb",
-- 1 },
-- { "b*",
-- "bb",
-- "aaaaa",
-- "bbaaaaa",
-- "bbabbabbabbabbabb",
-- 6 },
-- // Check newline handling
-- { "a.*a",
-- "(\\0)",
-- "aba\naba",
-- "(aba)\naba",
-- "(aba)\n(aba)",
-- 2 },
-- { "", NULL, NULL, NULL, NULL, 0 }
-- };
--
-- for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
-- VLOG(1) << StringPrintf("\"%s\" =~ s/%s/%s/g", t->original, t->regexp, t->rewrite);
-- string one(t->original);
-- CHECK(RE2::Replace(&one, t->regexp, t->rewrite));
-- CHECK_EQ(one, t->single);
-- string all(t->original);
-- CHECK_EQ(RE2::GlobalReplace(&all, t->regexp, t->rewrite), t->greplace_count)
-- << "Got: " << all;
-- CHECK_EQ(all, t->global);
-- }
--}
--
--static void TestCheckRewriteString(const char* regexp, const char* rewrite,
-- bool expect_ok) {
-- string error;
-- RE2 exp(regexp);
-- bool actual_ok = exp.CheckRewriteString(rewrite, &error);
-- EXPECT_EQ(expect_ok, actual_ok) << " for " << rewrite << " error: " << error;
--}
--
--TEST(CheckRewriteString, all) {
-- TestCheckRewriteString("abc", "foo", true);
-- TestCheckRewriteString("abc", "foo\\", false);
-- TestCheckRewriteString("abc", "foo\\0bar", true);
--
-- TestCheckRewriteString("a(b)c", "foo", true);
-- TestCheckRewriteString("a(b)c", "foo\\0bar", true);
-- TestCheckRewriteString("a(b)c", "foo\\1bar", true);
-- TestCheckRewriteString("a(b)c", "foo\\2bar", false);
-- TestCheckRewriteString("a(b)c", "f\\\\2o\\1o", true);
--
-- TestCheckRewriteString("a(b)(c)", "foo\\12", true);
-- TestCheckRewriteString("a(b)(c)", "f\\2o\\1o", true);
-- TestCheckRewriteString("a(b)(c)", "f\\oo\\1", false);
--}
--
--TEST(RE2, Extract) {
-- VLOG(1) << "TestExtract";
--
-- string s;
--
-- CHECK(RE2::Extract("boris at kremvax.ru", "(.*)@([^.]*)", "\\2!\\1", &s));
-- CHECK_EQ(s, "kremvax!boris");
--
-- CHECK(RE2::Extract("foo", ".*", "'\\0'", &s));
-- CHECK_EQ(s, "'foo'");
-- // check that false match doesn't overwrite
-- CHECK(!RE2::Extract("baz", "bar", "'\\0'", &s));
-- CHECK_EQ(s, "'foo'");
--}
--
--TEST(RE2, Consume) {
-- VLOG(1) << "TestConsume";
--
-- RE2 r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
-- string word;
--
-- string s(" aaa b!@#$@#$cccc");
-- StringPiece input(s);
--
-- CHECK(RE2::Consume(&input, r, &word));
-- CHECK_EQ(word, "aaa") << " input: " << input;
-- CHECK(RE2::Consume(&input, r, &word));
-- CHECK_EQ(word, "b") << " input: " << input;
-- CHECK(! RE2::Consume(&input, r, &word)) << " input: " << input;
--}
--
--TEST(RE2, ConsumeN) {
-- const string s(" one two three 4");
-- StringPiece input(s);
--
-- RE2::Arg argv[2];
-- const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
--
-- // 0 arg
-- EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 0)); // Skips "one".
--
-- // 1 arg
-- string word;
-- argv[0] = &word;
-- EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 1));
-- EXPECT_EQ("two", word);
--
-- // Multi-args
-- int n;
-- argv[1] = &n;
-- EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)\\s*(\\d+)", args, 2));
-- EXPECT_EQ("three", word);
-- EXPECT_EQ(4, n);
--}
--
--TEST(RE2, FindAndConsume) {
-- VLOG(1) << "TestFindAndConsume";
--
-- RE2 r("(\\w+)"); // matches a word
-- string word;
--
-- string s(" aaa b!@#$@#$cccc");
-- StringPiece input(s);
--
-- CHECK(RE2::FindAndConsume(&input, r, &word));
-- CHECK_EQ(word, "aaa");
-- CHECK(RE2::FindAndConsume(&input, r, &word));
-- CHECK_EQ(word, "b");
-- CHECK(RE2::FindAndConsume(&input, r, &word));
-- CHECK_EQ(word, "cccc");
-- CHECK(! RE2::FindAndConsume(&input, r, &word));
--
-- // Check that FindAndConsume works without any submatches.
-- // Earlier version used uninitialized data for
-- // length to consume.
-- input = "aaa";
-- CHECK(RE2::FindAndConsume(&input, "aaa"));
-- CHECK_EQ(input, "");
--}
--
--TEST(RE2, FindAndConsumeN) {
-- const string s(" one two three 4");
-- StringPiece input(s);
--
-- RE2::Arg argv[2];
-- const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
--
-- // 0 arg
-- EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 0)); // Skips "one".
--
-- // 1 arg
-- string word;
-- argv[0] = &word;
-- EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 1));
-- EXPECT_EQ("two", word);
--
-- // Multi-args
-- int n;
-- argv[1] = &n;
-- EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)\\s*(\\d+)", args, 2));
-- EXPECT_EQ("three", word);
-- EXPECT_EQ(4, n);
--}
--
--TEST(RE2, MatchNumberPeculiarity) {
-- VLOG(1) << "TestMatchNumberPeculiarity";
--
-- RE2 r("(foo)|(bar)|(baz)");
-- string word1;
-- string word2;
-- string word3;
--
-- CHECK(RE2::PartialMatch("foo", r, &word1, &word2, &word3));
-- CHECK_EQ(word1, "foo");
-- CHECK_EQ(word2, "");
-- CHECK_EQ(word3, "");
-- CHECK(RE2::PartialMatch("bar", r, &word1, &word2, &word3));
-- CHECK_EQ(word1, "");
-- CHECK_EQ(word2, "bar");
-- CHECK_EQ(word3, "");
-- CHECK(RE2::PartialMatch("baz", r, &word1, &word2, &word3));
-- CHECK_EQ(word1, "");
-- CHECK_EQ(word2, "");
-- CHECK_EQ(word3, "baz");
-- CHECK(!RE2::PartialMatch("f", r, &word1, &word2, &word3));
--
-- string a;
-- CHECK(RE2::FullMatch("hello", "(foo)|hello", &a));
-- CHECK_EQ(a, "");
--}
--
--TEST(RE2, Match) {
-- RE2 re("((\\w+):([0-9]+))"); // extracts host and port
-- StringPiece group[4];
--
-- // No match.
-- StringPiece s = "zyzzyva";
-- CHECK(!re.Match(s, 0, s.size(), RE2::UNANCHORED,
-- group, arraysize(group)));
--
-- // Matches and extracts.
-- s = "a chrisr:9000 here";
-- CHECK(re.Match(s, 0, s.size(), RE2::UNANCHORED,
-- group, arraysize(group)));
-- CHECK_EQ(group[0], "chrisr:9000");
-- CHECK_EQ(group[1], "chrisr:9000");
-- CHECK_EQ(group[2], "chrisr");
-- CHECK_EQ(group[3], "9000");
--
-- string all, host;
-- int port;
-- CHECK(RE2::PartialMatch("a chrisr:9000 here", re, &all, &host, &port));
-- CHECK_EQ(all, "chrisr:9000");
-- CHECK_EQ(host, "chrisr");
-- CHECK_EQ(port, 9000);
--}
--
--static void TestRecursion(int size, const char *pattern) {
-- // Fill up a string repeating the pattern given
-- string domain;
-- domain.resize(size);
-- int patlen = strlen(pattern);
-- for (int i = 0; i < size; ++i) {
-- domain[i] = pattern[i % patlen];
-- }
-- // Just make sure it doesn't crash due to too much recursion.
-- RE2 re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", RE2::Quiet);
-- RE2::FullMatch(domain, re);
--}
--
--// A meta-quoted string, interpreted as a pattern, should always match
--// the original unquoted string.
--static void TestQuoteMeta(string unquoted,
-- const RE2::Options& options = RE2::DefaultOptions) {
-- string quoted = RE2::QuoteMeta(unquoted);
-- RE2 re(quoted, options);
-- EXPECT_TRUE_M(RE2::FullMatch(unquoted, re),
-- "Unquoted='" + unquoted + "', quoted='" + quoted + "'.");
--}
--
--// A meta-quoted string, interpreted as a pattern, should always match
--// the original unquoted string.
--static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
-- const RE2::Options& options = RE2::DefaultOptions) {
-- string quoted = RE2::QuoteMeta(unquoted);
-- RE2 re(quoted, options);
-- EXPECT_FALSE_M(RE2::FullMatch(should_not_match, re),
-- "Unquoted='" + unquoted + "', quoted='" + quoted + "'.");
--}
--
--// Tests that quoted meta characters match their original strings,
--// and that a few things that shouldn't match indeed do not.
--TEST(QuoteMeta, Simple) {
-- TestQuoteMeta("foo");
-- TestQuoteMeta("foo.bar");
-- TestQuoteMeta("foo\\.bar");
-- TestQuoteMeta("[1-9]");
-- TestQuoteMeta("1.5-2.0?");
-- TestQuoteMeta("\\d");
-- TestQuoteMeta("Who doesn't like ice cream?");
-- TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
-- TestQuoteMeta("((?!)xxx).*yyy");
-- TestQuoteMeta("([");
--}
--TEST(QuoteMeta, SimpleNegative) {
-- NegativeTestQuoteMeta("foo", "bar");
-- NegativeTestQuoteMeta("...", "bar");
-- NegativeTestQuoteMeta("\\.", ".");
-- NegativeTestQuoteMeta("\\.", "..");
-- NegativeTestQuoteMeta("(a)", "a");
-- NegativeTestQuoteMeta("(a|b)", "a");
-- NegativeTestQuoteMeta("(a|b)", "(a)");
-- NegativeTestQuoteMeta("(a|b)", "a|b");
-- NegativeTestQuoteMeta("[0-9]", "0");
-- NegativeTestQuoteMeta("[0-9]", "0-9");
-- NegativeTestQuoteMeta("[0-9]", "[9]");
-- NegativeTestQuoteMeta("((?!)xxx)", "xxx");
--}
--
--TEST(QuoteMeta, Latin1) {
-- TestQuoteMeta("3\xb2 = 9", RE2::Latin1);
--}
--
--TEST(QuoteMeta, UTF8) {
-- TestQuoteMeta("Plácido Domingo");
-- TestQuoteMeta("xyz"); // No fancy utf8.
-- TestQuoteMeta("\xc2\xb0"); // 2-byte utf8 -- a degree symbol.
-- TestQuoteMeta("27\xc2\xb0 degrees"); // As a middle character.
-- TestQuoteMeta("\xe2\x80\xb3"); // 3-byte utf8 -- a double prime.
-- TestQuoteMeta("\xf0\x9d\x85\x9f"); // 4-byte utf8 -- a music note.
-- TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, this should
-- // still work.
-- NegativeTestQuoteMeta("27\xc2\xb0",
-- "27\\\xc2\\\xb0"); // 2-byte utf8 -- a degree symbol.
--}
--
--TEST(QuoteMeta, HasNull) {
-- string has_null;
--
-- // string with one null character
-- has_null += '\0';
-- TestQuoteMeta(has_null);
-- NegativeTestQuoteMeta(has_null, "");
--
-- // Don't want null-followed-by-'1' to be interpreted as '\01'.
-- has_null += '1';
-- TestQuoteMeta(has_null);
-- NegativeTestQuoteMeta(has_null, "\1");
--}
--
--TEST(ProgramSize, BigProgram) {
-- RE2 re_simple("simple regexp");
-- RE2 re_medium("medium.*regexp");
-- RE2 re_complex("hard.{1,128}regexp");
--
-- CHECK_GT(re_simple.ProgramSize(), 0);
-- CHECK_GT(re_medium.ProgramSize(), re_simple.ProgramSize());
-- CHECK_GT(re_complex.ProgramSize(), re_medium.ProgramSize());
--}
--
--// Issue 956519: handling empty character sets was
--// causing NULL dereference. This tests a few empty character sets.
--// (The way to get an empty character set is to negate a full one.)
--TEST(EmptyCharset, Fuzz) {
-- static const char *empties[] = {
-- "[^\\S\\s]",
-- "[^\\S[:space:]]",
-- "[^\\D\\d]",
-- "[^\\D[:digit:]]"
-- };
-- for (int i = 0; i < arraysize(empties); i++)
-- CHECK(!RE2(empties[i]).Match("abc", 0, 3, RE2::UNANCHORED, NULL, 0));
--}
--
--// Test that named groups work correctly.
--TEST(Capture, NamedGroups) {
-- {
-- RE2 re("(hello world)");
-- CHECK_EQ(re.NumberOfCapturingGroups(), 1);
-- const map<string, int>& m = re.NamedCapturingGroups();
-- CHECK_EQ(m.size(), 0);
-- }
--
-- {
-- RE2 re("(?P<A>expr(?P<B>expr)(?P<C>expr))((expr)(?P<D>expr))");
-- CHECK_EQ(re.NumberOfCapturingGroups(), 6);
-- const map<string, int>& m = re.NamedCapturingGroups();
-- CHECK_EQ(m.size(), 4);
-- CHECK_EQ(m.find("A")->second, 1);
-- CHECK_EQ(m.find("B")->second, 2);
-- CHECK_EQ(m.find("C")->second, 3);
-- CHECK_EQ(m.find("D")->second, 6); // $4 and $5 are anonymous
-- }
--}
--
--TEST(RE2, FullMatchWithNoArgs) {
-- CHECK(RE2::FullMatch("h", "h"));
-- CHECK(RE2::FullMatch("hello", "hello"));
-- CHECK(RE2::FullMatch("hello", "h.*o"));
-- CHECK(!RE2::FullMatch("othello", "h.*o")); // Must be anchored at front
-- CHECK(!RE2::FullMatch("hello!", "h.*o")); // Must be anchored at end
--}
--
--TEST(RE2, PartialMatch) {
-- CHECK(RE2::PartialMatch("x", "x"));
-- CHECK(RE2::PartialMatch("hello", "h.*o"));
-- CHECK(RE2::PartialMatch("othello", "h.*o"));
-- CHECK(RE2::PartialMatch("hello!", "h.*o"));
-- CHECK(RE2::PartialMatch("x", "((((((((((((((((((((x))))))))))))))))))))"));
--}
--
--TEST(RE2, PartialMatchN) {
-- RE2::Arg argv[2];
-- const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
--
-- // 0 arg
-- EXPECT_TRUE(RE2::PartialMatchN("hello", "e.*o", args, 0));
-- EXPECT_FALSE(RE2::PartialMatchN("othello", "a.*o", args, 0));
--
-- // 1 arg
-- int i;
-- argv[0] = &i;
-- EXPECT_TRUE(RE2::PartialMatchN("1001 nights", "(\\d+)", args, 1));
-- EXPECT_EQ(1001, i);
-- EXPECT_FALSE(RE2::PartialMatchN("three", "(\\d+)", args, 1));
--
-- // Multi-arg
-- string s;
-- argv[1] = &s;
-- EXPECT_TRUE(RE2::PartialMatchN("answer: 42:life", "(\\d+):(\\w+)", args, 2));
-- EXPECT_EQ(42, i);
-- EXPECT_EQ("life", s);
-- EXPECT_FALSE(RE2::PartialMatchN("hi1", "(\\w+)(1)", args, 2));
--}
--
--TEST(RE2, FullMatchZeroArg) {
-- // Zero-arg
-- CHECK(RE2::FullMatch("1001", "\\d+"));
--}
--
--TEST(RE2, FullMatchOneArg) {
-- int i;
--
-- // Single-arg
-- CHECK(RE2::FullMatch("1001", "(\\d+)", &i));
-- CHECK_EQ(i, 1001);
-- CHECK(RE2::FullMatch("-123", "(-?\\d+)", &i));
-- CHECK_EQ(i, -123);
-- CHECK(!RE2::FullMatch("10", "()\\d+", &i));
-- CHECK(!RE2::FullMatch("1234567890123456789012345678901234567890",
-- "(\\d+)", &i));
--}
--
--TEST(RE2, FullMatchIntegerArg) {
-- int i;
--
-- // Digits surrounding integer-arg
-- CHECK(RE2::FullMatch("1234", "1(\\d*)4", &i));
-- CHECK_EQ(i, 23);
-- CHECK(RE2::FullMatch("1234", "(\\d)\\d+", &i));
-- CHECK_EQ(i, 1);
-- CHECK(RE2::FullMatch("-1234", "(-\\d)\\d+", &i));
-- CHECK_EQ(i, -1);
-- CHECK(RE2::PartialMatch("1234", "(\\d)", &i));
-- CHECK_EQ(i, 1);
-- CHECK(RE2::PartialMatch("-1234", "(-\\d)", &i));
-- CHECK_EQ(i, -1);
--}
--
--TEST(RE2, FullMatchStringArg) {
-- string s;
-- // String-arg
-- CHECK(RE2::FullMatch("hello", "h(.*)o", &s));
-- CHECK_EQ(s, string("ell"));
--}
--
--TEST(RE2, FullMatchStringPieceArg) {
-- int i;
-- // StringPiece-arg
-- StringPiece sp;
-- CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &sp, &i));
-- CHECK_EQ(sp.size(), 4);
-- CHECK(memcmp(sp.data(), "ruby", 4) == 0);
-- CHECK_EQ(i, 1234);
--}
--
--TEST(RE2, FullMatchMultiArg) {
-- int i;
-- string s;
-- // Multi-arg
-- CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
-- CHECK_EQ(s, string("ruby"));
-- CHECK_EQ(i, 1234);
--}
--
--TEST(RE2, FullMatchN) {
-- RE2::Arg argv[2];
-- const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
--
-- // 0 arg
-- EXPECT_TRUE(RE2::FullMatchN("hello", "h.*o", args, 0));
-- EXPECT_FALSE(RE2::FullMatchN("othello", "h.*o", args, 0));
--
-- // 1 arg
-- int i;
-- argv[0] = &i;
-- EXPECT_TRUE(RE2::FullMatchN("1001", "(\\d+)", args, 1));
-- EXPECT_EQ(1001, i);
-- EXPECT_FALSE(RE2::FullMatchN("three", "(\\d+)", args, 1));
--
-- // Multi-arg
-- string s;
-- argv[1] = &s;
-- EXPECT_TRUE(RE2::FullMatchN("42:life", "(\\d+):(\\w+)", args, 2));
-- EXPECT_EQ(42, i);
-- EXPECT_EQ("life", s);
-- EXPECT_FALSE(RE2::FullMatchN("hi1", "(\\w+)(1)", args, 2));
--}
--
--TEST(RE2, FullMatchIgnoredArg) {
-- int i;
-- string s;
-- // Ignored arg
-- CHECK(RE2::FullMatch("ruby:1234", "(\\w+)(:)(\\d+)", &s, (void*)NULL, &i));
-- CHECK_EQ(s, string("ruby"));
-- CHECK_EQ(i, 1234);
--}
--
--TEST(RE2, FullMatchTypedNullArg) {
-- string s;
--
-- // Ignore non-void* NULL arg
-- CHECK(RE2::FullMatch("hello", "he(.*)lo", (char*)NULL));
-- CHECK(RE2::FullMatch("hello", "h(.*)o", (string*)NULL));
-- CHECK(RE2::FullMatch("hello", "h(.*)o", (StringPiece*)NULL));
-- CHECK(RE2::FullMatch("1234", "(.*)", (int*)NULL));
-- CHECK(RE2::FullMatch("1234567890123456", "(.*)", (long long*)NULL));
-- CHECK(RE2::FullMatch("123.4567890123456", "(.*)", (double*)NULL));
-- CHECK(RE2::FullMatch("123.4567890123456", "(.*)", (float*)NULL));
--
-- // Fail on non-void* NULL arg if the match doesn't parse for the given type.
-- CHECK(!RE2::FullMatch("hello", "h(.*)lo", &s, (char*)NULL));
-- CHECK(!RE2::FullMatch("hello", "(.*)", (int*)NULL));
-- CHECK(!RE2::FullMatch("1234567890123456", "(.*)", (int*)NULL));
-- CHECK(!RE2::FullMatch("hello", "(.*)", (double*)NULL));
-- CHECK(!RE2::FullMatch("hello", "(.*)", (float*)NULL));
--}
--
--// Check that numeric parsing code does not read past the end of
--// the number being parsed.
--TEST(RE2, NULTerminated) {
-- char *v;
-- int x;
-- long pagesize = sysconf(_SC_PAGE_SIZE);
--
--#ifndef MAP_ANONYMOUS
--#define MAP_ANONYMOUS MAP_ANON
--#endif
-- v = static_cast<char*>(mmap(NULL, 2*pagesize, PROT_READ|PROT_WRITE,
-- MAP_ANONYMOUS|MAP_PRIVATE, -1, 0));
-- CHECK(v != reinterpret_cast<char*>(-1));
-- LOG(INFO) << "Memory at " << (void*)v;
-- CHECK_EQ(munmap(v + pagesize, pagesize), 0) << " error " << errno;
-- v[pagesize - 1] = '1';
--
-- x = 0;
-- CHECK(RE2::FullMatch(StringPiece(v + pagesize - 1, 1), "(.*)", &x));
-- CHECK_EQ(x, 1);
--}
--
--TEST(RE2, FullMatchTypeTests) {
-- // Type tests
-- string zeros(100, '0');
-- {
-- char c;
-- CHECK(RE2::FullMatch("Hello", "(H)ello", &c));
-- CHECK_EQ(c, 'H');
-- }
-- {
-- unsigned char c;
-- CHECK(RE2::FullMatch("Hello", "(H)ello", &c));
-- CHECK_EQ(c, static_cast<unsigned char>('H'));
-- }
-- {
-- int16 v;
-- CHECK(RE2::FullMatch("100", "(-?\\d+)", &v)); CHECK_EQ(v, 100);
-- CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v)); CHECK_EQ(v, -100);
-- CHECK(RE2::FullMatch("32767", "(-?\\d+)", &v)); CHECK_EQ(v, 32767);
-- CHECK(RE2::FullMatch("-32768", "(-?\\d+)", &v)); CHECK_EQ(v, -32768);
-- CHECK(!RE2::FullMatch("-32769", "(-?\\d+)", &v));
-- CHECK(!RE2::FullMatch("32768", "(-?\\d+)", &v));
-- }
-- {
-- uint16 v;
-- CHECK(RE2::FullMatch("100", "(\\d+)", &v)); CHECK_EQ(v, 100);
-- CHECK(RE2::FullMatch("32767", "(\\d+)", &v)); CHECK_EQ(v, 32767);
-- CHECK(RE2::FullMatch("65535", "(\\d+)", &v)); CHECK_EQ(v, 65535);
-- CHECK(!RE2::FullMatch("65536", "(\\d+)", &v));
-- }
-- {
-- int32 v;
-- static const int32 max = 0x7fffffff;
-- static const int32 min = -max - 1;
-- CHECK(RE2::FullMatch("100", "(-?\\d+)", &v)); CHECK_EQ(v, 100);
-- CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v)); CHECK_EQ(v, -100);
-- CHECK(RE2::FullMatch("2147483647", "(-?\\d+)", &v)); CHECK_EQ(v, max);
-- CHECK(RE2::FullMatch("-2147483648", "(-?\\d+)", &v)); CHECK_EQ(v, min);
-- CHECK(!RE2::FullMatch("-2147483649", "(-?\\d+)", &v));
-- CHECK(!RE2::FullMatch("2147483648", "(-?\\d+)", &v));
--
-- CHECK(RE2::FullMatch(zeros + "2147483647", "(-?\\d+)", &v));
-- CHECK_EQ(v, max);
-- CHECK(RE2::FullMatch("-" + zeros + "2147483648", "(-?\\d+)", &v));
-- CHECK_EQ(v, min);
--
-- CHECK(!RE2::FullMatch("-" + zeros + "2147483649", "(-?\\d+)", &v));
-- CHECK(RE2::FullMatch("0x7fffffff", "(.*)", RE2::CRadix(&v)));
-- CHECK_EQ(v, max);
-- CHECK(!RE2::FullMatch("000x7fffffff", "(.*)", RE2::CRadix(&v)));
-- }
-- {
-- uint32 v;
-- static const uint32 max = 0xfffffffful;
-- CHECK(RE2::FullMatch("100", "(\\d+)", &v)); CHECK_EQ(v, 100);
-- CHECK(RE2::FullMatch("4294967295", "(\\d+)", &v)); CHECK_EQ(v, max);
-- CHECK(!RE2::FullMatch("4294967296", "(\\d+)", &v));
-- CHECK(!RE2::FullMatch("-1", "(\\d+)", &v));
--
-- CHECK(RE2::FullMatch(zeros + "4294967295", "(\\d+)", &v)); CHECK_EQ(v, max);
-- }
-- {
-- int64 v;
-- static const int64 max = 0x7fffffffffffffffull;
-- static const int64 min = -max - 1;
-- char buf[32];
--
-- CHECK(RE2::FullMatch("100", "(-?\\d+)", &v)); CHECK_EQ(v, 100);
-- CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v)); CHECK_EQ(v, -100);
--
-- snprintf(buf, sizeof(buf), "%lld", max);
-- CHECK(RE2::FullMatch(buf, "(-?\\d+)", &v)); CHECK_EQ(v, max);
--
-- snprintf(buf, sizeof(buf), "%lld", min);
-- CHECK(RE2::FullMatch(buf, "(-?\\d+)", &v)); CHECK_EQ(v, min);
--
-- snprintf(buf, sizeof(buf), "%lld", max);
-- assert(buf[strlen(buf)-1] != '9');
-- buf[strlen(buf)-1]++;
-- CHECK(!RE2::FullMatch(buf, "(-?\\d+)", &v));
--
-- snprintf(buf, sizeof(buf), "%lld", min);
-- assert(buf[strlen(buf)-1] != '9');
-- buf[strlen(buf)-1]++;
-- CHECK(!RE2::FullMatch(buf, "(-?\\d+)", &v));
-- }
-- {
-- uint64 v;
-- int64 v2;
-- static const uint64 max = 0xffffffffffffffffull;
-- char buf[32];
--
-- CHECK(RE2::FullMatch("100", "(-?\\d+)", &v)); CHECK_EQ(v, 100);
-- CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v2)); CHECK_EQ(v2, -100);
--
-- snprintf(buf, sizeof(buf), "%llu", max);
-- CHECK(RE2::FullMatch(buf, "(-?\\d+)", &v)); CHECK_EQ(v, max);
--
-- assert(buf[strlen(buf)-1] != '9');
-- buf[strlen(buf)-1]++;
-- CHECK(!RE2::FullMatch(buf, "(-?\\d+)", &v));
-- }
--}
--
--TEST(RE2, FloatingPointFullMatchTypes) {
-- string zeros(100, '0');
-- {
-- float v;
-- CHECK(RE2::FullMatch("100", "(.*)", &v)); CHECK_EQ(v, 100);
-- CHECK(RE2::FullMatch("-100.", "(.*)", &v)); CHECK_EQ(v, -100);
-- CHECK(RE2::FullMatch("1e23", "(.*)", &v)); CHECK_EQ(v, float(1e23));
--
-- CHECK(RE2::FullMatch(zeros + "1e23", "(.*)", &v));
-- CHECK_EQ(v, float(1e23));
--
-- // 6700000000081920.1 is an edge case.
-- // 6700000000081920 is exactly halfway between
-- // two float32s, so the .1 should make it round up.
-- // However, the .1 is outside the precision possible with
-- // a float64: the nearest float64 is 6700000000081920.
-- // So if the code uses strtod and then converts to float32,
-- // round-to-even will make it round down instead of up.
-- // To pass the test, the parser must call strtof directly.
-- // This test case is carefully chosen to use only a 17-digit
-- // number, since C does not guarantee to get the correctly
-- // rounded answer for strtod and strtof unless the input is
-- // short.
-- CHECK(RE2::FullMatch("0.1", "(.*)", &v));
-- CHECK_EQ(v, 0.1f) << StringPrintf("%.8g != %.8g", v, 0.1f);
-- CHECK(RE2::FullMatch("6700000000081920.1", "(.*)", &v));
-- CHECK_EQ(v, 6700000000081920.1f)
-- << StringPrintf("%.8g != %.8g", v, 6700000000081920.1f);
-- }
-- {
-- double v;
-- CHECK(RE2::FullMatch("100", "(.*)", &v)); CHECK_EQ(v, 100);
-- CHECK(RE2::FullMatch("-100.", "(.*)", &v)); CHECK_EQ(v, -100);
-- CHECK(RE2::FullMatch("1e23", "(.*)", &v)); CHECK_EQ(v, 1e23);
-- CHECK(RE2::FullMatch(zeros + "1e23", "(.*)", &v));
-- CHECK_EQ(v, double(1e23));
--
-- CHECK(RE2::FullMatch("0.1", "(.*)", &v));
-- CHECK_EQ(v, 0.1) << StringPrintf("%.17g != %.17g", v, 0.1);
-- CHECK(RE2::FullMatch("1.00000005960464485", "(.*)", &v));
-- CHECK_EQ(v, 1.0000000596046448)
-- << StringPrintf("%.17g != %.17g", v, 1.0000000596046448);
-- }
--}
--
--TEST(RE2, FullMatchAnchored) {
-- int i;
-- // Check that matching is fully anchored
-- CHECK(!RE2::FullMatch("x1001", "(\\d+)", &i));
-- CHECK(!RE2::FullMatch("1001x", "(\\d+)", &i));
-- CHECK(RE2::FullMatch("x1001", "x(\\d+)", &i)); CHECK_EQ(i, 1001);
-- CHECK(RE2::FullMatch("1001x", "(\\d+)x", &i)); CHECK_EQ(i, 1001);
--}
--
--TEST(RE2, FullMatchBraces) {
-- // Braces
-- CHECK(RE2::FullMatch("0abcd", "[0-9a-f+.-]{5,}"));
-- CHECK(RE2::FullMatch("0abcde", "[0-9a-f+.-]{5,}"));
-- CHECK(!RE2::FullMatch("0abc", "[0-9a-f+.-]{5,}"));
--}
--
--TEST(RE2, Complicated) {
-- // Complicated RE2
-- CHECK(RE2::FullMatch("foo", "foo|bar|[A-Z]"));
-- CHECK(RE2::FullMatch("bar", "foo|bar|[A-Z]"));
-- CHECK(RE2::FullMatch("X", "foo|bar|[A-Z]"));
-- CHECK(!RE2::FullMatch("XY", "foo|bar|[A-Z]"));
--}
--
--TEST(RE2, FullMatchEnd) {
-- // Check full-match handling (needs '$' tacked on internally)
-- CHECK(RE2::FullMatch("fo", "fo|foo"));
-- CHECK(RE2::FullMatch("foo", "fo|foo"));
-- CHECK(RE2::FullMatch("fo", "fo|foo$"));
-- CHECK(RE2::FullMatch("foo", "fo|foo$"));
-- CHECK(RE2::FullMatch("foo", "foo$"));
-- CHECK(!RE2::FullMatch("foo$bar", "foo\\$"));
-- CHECK(!RE2::FullMatch("fox", "fo|bar"));
--
-- // Uncomment the following if we change the handling of '$' to
-- // prevent it from matching a trailing newline
-- if (false) {
-- // Check that we don't get bitten by pcre's special handling of a
-- // '\n' at the end of the string matching '$'
-- CHECK(!RE2::PartialMatch("foo\n", "foo$"));
-- }
--}
--
--TEST(RE2, FullMatchArgCount) {
-- // Number of args
-- int a[16];
-- CHECK(RE2::FullMatch("", ""));
--
-- memset(a, 0, sizeof(0));
-- CHECK(RE2::FullMatch("1",
-- "(\\d){1}",
-- &a[0]));
-- CHECK_EQ(a[0], 1);
--
-- memset(a, 0, sizeof(0));
-- CHECK(RE2::FullMatch("12",
-- "(\\d)(\\d)",
-- &a[0], &a[1]));
-- CHECK_EQ(a[0], 1);
-- CHECK_EQ(a[1], 2);
--
-- memset(a, 0, sizeof(0));
-- CHECK(RE2::FullMatch("123",
-- "(\\d)(\\d)(\\d)",
-- &a[0], &a[1], &a[2]));
-- CHECK_EQ(a[0], 1);
-- CHECK_EQ(a[1], 2);
-- CHECK_EQ(a[2], 3);
--
-- memset(a, 0, sizeof(0));
-- CHECK(RE2::FullMatch("1234",
-- "(\\d)(\\d)(\\d)(\\d)",
-- &a[0], &a[1], &a[2], &a[3]));
-- CHECK_EQ(a[0], 1);
-- CHECK_EQ(a[1], 2);
-- CHECK_EQ(a[2], 3);
-- CHECK_EQ(a[3], 4);
--
-- memset(a, 0, sizeof(0));
-- CHECK(RE2::FullMatch("12345",
-- "(\\d)(\\d)(\\d)(\\d)(\\d)",
-- &a[0], &a[1], &a[2], &a[3],
-- &a[4]));
-- CHECK_EQ(a[0], 1);
-- CHECK_EQ(a[1], 2);
-- CHECK_EQ(a[2], 3);
-- CHECK_EQ(a[3], 4);
-- CHECK_EQ(a[4], 5);
--
-- memset(a, 0, sizeof(0));
-- CHECK(RE2::FullMatch("123456",
-- "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
-- &a[0], &a[1], &a[2], &a[3],
-- &a[4], &a[5]));
-- CHECK_EQ(a[0], 1);
-- CHECK_EQ(a[1], 2);
-- CHECK_EQ(a[2], 3);
-- CHECK_EQ(a[3], 4);
-- CHECK_EQ(a[4], 5);
-- CHECK_EQ(a[5], 6);
--
-- memset(a, 0, sizeof(0));
-- CHECK(RE2::FullMatch("1234567",
-- "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
-- &a[0], &a[1], &a[2], &a[3],
-- &a[4], &a[5], &a[6]));
-- CHECK_EQ(a[0], 1);
-- CHECK_EQ(a[1], 2);
-- CHECK_EQ(a[2], 3);
-- CHECK_EQ(a[3], 4);
-- CHECK_EQ(a[4], 5);
-- CHECK_EQ(a[5], 6);
-- CHECK_EQ(a[6], 7);
--
-- memset(a, 0, sizeof(0));
-- CHECK(RE2::FullMatch("1234567890123456",
-- "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
-- "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
-- &a[0], &a[1], &a[2], &a[3],
-- &a[4], &a[5], &a[6], &a[7],
-- &a[8], &a[9], &a[10], &a[11],
-- &a[12], &a[13], &a[14], &a[15]));
-- CHECK_EQ(a[0], 1);
-- CHECK_EQ(a[1], 2);
-- CHECK_EQ(a[2], 3);
-- CHECK_EQ(a[3], 4);
-- CHECK_EQ(a[4], 5);
-- CHECK_EQ(a[5], 6);
-- CHECK_EQ(a[6], 7);
-- CHECK_EQ(a[7], 8);
-- CHECK_EQ(a[8], 9);
-- CHECK_EQ(a[9], 0);
-- CHECK_EQ(a[10], 1);
-- CHECK_EQ(a[11], 2);
-- CHECK_EQ(a[12], 3);
-- CHECK_EQ(a[13], 4);
-- CHECK_EQ(a[14], 5);
-- CHECK_EQ(a[15], 6);
--}
--
--TEST(RE2, Accessors) {
-- // Check the pattern() accessor
-- {
-- const string kPattern = "http://([^/]+)/.*";
-- const RE2 re(kPattern);
-- CHECK_EQ(kPattern, re.pattern());
-- }
--
-- // Check RE2 error field.
-- {
-- RE2 re("foo");
-- CHECK(re.error().empty()); // Must have no error
-- CHECK(re.ok());
-- CHECK(re.error_code() == RE2::NoError);
-- }
--}
--
--TEST(RE2, UTF8) {
-- // Check UTF-8 handling
-- // Three Japanese characters (nihongo)
-- const char utf8_string[] = {
-- 0xe6, 0x97, 0xa5, // 65e5
-- 0xe6, 0x9c, 0xac, // 627c
-- 0xe8, 0xaa, 0x9e, // 8a9e
-- 0
-- };
-- const char utf8_pattern[] = {
-- '.',
-- 0xe6, 0x9c, 0xac, // 627c
-- '.',
-- 0
-- };
--
-- // Both should match in either mode, bytes or UTF-8
-- RE2 re_test1(".........", RE2::Latin1);
-- CHECK(RE2::FullMatch(utf8_string, re_test1));
-- RE2 re_test2("...");
-- CHECK(RE2::FullMatch(utf8_string, re_test2));
--
-- // Check that '.' matches one byte or UTF-8 character
-- // according to the mode.
-- string s;
-- RE2 re_test3("(.)", RE2::Latin1);
-- CHECK(RE2::PartialMatch(utf8_string, re_test3, &s));
-- CHECK_EQ(s, string("\xe6"));
-- RE2 re_test4("(.)");
-- CHECK(RE2::PartialMatch(utf8_string, re_test4, &s));
-- CHECK_EQ(s, string("\xe6\x97\xa5"));
--
-- // Check that string matches itself in either mode
-- RE2 re_test5(utf8_string, RE2::Latin1);
-- CHECK(RE2::FullMatch(utf8_string, re_test5));
-- RE2 re_test6(utf8_string);
-- CHECK(RE2::FullMatch(utf8_string, re_test6));
--
-- // Check that pattern matches string only in UTF8 mode
-- RE2 re_test7(utf8_pattern, RE2::Latin1);
-- CHECK(!RE2::FullMatch(utf8_string, re_test7));
-- RE2 re_test8(utf8_pattern);
-- CHECK(RE2::FullMatch(utf8_string, re_test8));
--}
--
--TEST(RE2, UngreedyUTF8) {
-- // Check that ungreedy, UTF8 regular expressions don't match when they
-- // oughtn't -- see bug 82246.
-- {
-- // This code always worked.
-- const char* pattern = "\\w+X";
-- const string target = "a aX";
-- RE2 match_sentence(pattern, RE2::Latin1);
-- RE2 match_sentence_re(pattern);
--
-- CHECK(!RE2::FullMatch(target, match_sentence));
-- CHECK(!RE2::FullMatch(target, match_sentence_re));
-- }
-- {
-- const char* pattern = "(?U)\\w+X";
-- const string target = "a aX";
-- RE2 match_sentence(pattern, RE2::Latin1);
-- CHECK_EQ(match_sentence.error(), "");
-- RE2 match_sentence_re(pattern);
--
-- CHECK(!RE2::FullMatch(target, match_sentence));
-- CHECK(!RE2::FullMatch(target, match_sentence_re));
-- }
--}
--
--TEST(RE2, Rejects) {
-- { RE2 re("a\\1", RE2::Quiet); CHECK(!re.ok()); }
-- {
-- RE2 re("a[x", RE2::Quiet);
-- CHECK(!re.ok());
-- }
-- {
-- RE2 re("a[z-a]", RE2::Quiet);
-- CHECK(!re.ok());
-- }
-- {
-- RE2 re("a[[:foobar:]]", RE2::Quiet);
-- CHECK(!re.ok());
-- }
-- {
-- RE2 re("a(b", RE2::Quiet);
-- CHECK(!re.ok());
-- }
-- {
-- RE2 re("a\\", RE2::Quiet);
-- CHECK(!re.ok());
-- }
--}
--
--TEST(RE2, NoCrash) {
-- // Test that using a bad regexp doesn't crash.
-- {
-- RE2 re("a\\", RE2::Quiet);
-- CHECK(!re.ok());
-- CHECK(!RE2::PartialMatch("a\\b", re));
-- }
--
-- // Test that using an enormous regexp doesn't crash
-- {
-- RE2 re("(((.{100}){100}){100}){100}", RE2::Quiet);
-- CHECK(!re.ok());
-- CHECK(!RE2::PartialMatch("aaa", re));
-- }
--
-- // Test that a crazy regexp still compiles and runs.
-- {
-- RE2 re(".{512}x", RE2::Quiet);
-- CHECK(re.ok());
-- string s;
-- s.append(515, 'c');
-- s.append("x");
-- CHECK(RE2::PartialMatch(s, re));
-- }
--}
--
--TEST(RE2, Recursion) {
-- // Test that recursion is stopped.
-- // This test is PCRE-legacy -- there's no recursion in RE2.
-- int bytes = 15 * 1024; // enough to crash PCRE
-- TestRecursion(bytes, ".");
-- TestRecursion(bytes, "a");
-- TestRecursion(bytes, "a.");
-- TestRecursion(bytes, "ab.");
-- TestRecursion(bytes, "abc.");
--}
--
--TEST(RE2, BigCountedRepetition) {
-- // Test that counted repetition works, given tons of memory.
-- RE2::Options opt;
-- opt.set_max_mem(256<<20);
--
-- RE2 re(".{512}x", opt);
-- CHECK(re.ok());
-- string s;
-- s.append(515, 'c');
-- s.append("x");
-- CHECK(RE2::PartialMatch(s, re));
--}
--
--TEST(RE2, DeepRecursion) {
-- // Test for deep stack recursion. This would fail with a
-- // segmentation violation due to stack overflow before pcre was
-- // patched.
-- // Again, a PCRE legacy test. RE2 doesn't recurse.
-- string comment("x*");
-- string a(131072, 'a');
-- comment += a;
-- comment += "*x";
-- RE2 re("((?:\\s|xx.*\n|x[*](?:\n|.)*?[*]x)*)");
-- CHECK(RE2::FullMatch(comment, re));
--}
--
--// Suggested by Josh Hyman. Failed when SearchOnePass was
--// not implementing case-folding.
--TEST(CaseInsensitive, MatchAndConsume) {
-- string result;
-- string text = "A fish named *Wanda*";
-- StringPiece sp(text);
--
-- EXPECT_TRUE(RE2::PartialMatch(sp, "(?i)([wand]{5})", &result));
-- EXPECT_TRUE(RE2::FindAndConsume(&sp, "(?i)([wand]{5})", &result));
--}
--
--// RE2 should permit implicit conversions from string, StringPiece, const char*,
--// and C string literals.
--TEST(RE2, ImplicitConversions) {
-- string re_string(".");
-- StringPiece re_stringpiece(".");
-- const char* re_cstring = ".";
-- EXPECT_TRUE(RE2::PartialMatch("e", re_string));
-- EXPECT_TRUE(RE2::PartialMatch("e", re_stringpiece));
-- EXPECT_TRUE(RE2::PartialMatch("e", re_cstring));
-- EXPECT_TRUE(RE2::PartialMatch("e", "."));
--}
--
--// Bugs introduced by 8622304
--TEST(RE2, CL8622304) {
-- // reported by ingow
-- string dir;
-- EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])")); // ok
-- EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])", &dir)); // fails
--
-- // reported by jacobsa
-- string key, val;
-- EXPECT_TRUE(RE2::PartialMatch("bar:1,0x2F,030,4,5;baz:true;fooby:false,true",
-- "(\\w+)(?::((?:[^;\\\\]|\\\\.)*))?;?",
-- &key,
-- &val));
-- EXPECT_EQ(key, "bar");
-- EXPECT_EQ(val, "1,0x2F,030,4,5");
--}
--
--
--// Check that RE2 returns correct regexp pieces on error.
--// In particular, make sure it returns whole runes
--// and that it always reports invalid UTF-8.
--// Also check that Perl error flag piece is big enough.
--static struct ErrorTest {
-- const char *regexp;
-- const char *error;
--} error_tests[] = {
-- { "ab\\αcd", "\\α" },
-- { "ef\\x☺01", "\\x☺0" },
-- { "gh\\x1☺01", "\\x1☺" },
-- { "ij\\x1", "\\x1" },
-- { "kl\\x", "\\x" },
-- { "uv\\x{0000☺}", "\\x{0000☺" },
-- { "wx\\p{ABC", "\\p{ABC" },
-- { "yz(?smiUX:abc)", "(?smiUX" }, // used to return (?s but the error is X
-- { "aa(?sm☺i", "(?sm☺" },
-- { "bb[abc", "[abc" },
--
-- { "mn\\x1\377", "" }, // no argument string returned for invalid UTF-8
-- { "op\377qr", "" },
-- { "st\\x{00000\377", "" },
-- { "zz\\p{\377}", "" },
-- { "zz\\x{00\377}", "" },
-- { "zz(?P<name\377>abc)", "" },
--};
--TEST(RE2, ErrorArgs) {
-- for (int i = 0; i < arraysize(error_tests); i++) {
-- RE2 re(error_tests[i].regexp, RE2::Quiet);
-- EXPECT_FALSE(re.ok());
-- EXPECT_EQ(re.error_arg(), error_tests[i].error) << re.error();
-- }
--}
--
--// Check that "never match \n" mode never matches \n.
--static struct NeverTest {
-- const char* regexp;
-- const char* text;
-- const char* match;
--} never_tests[] = {
-- { "(.*)", "abc\ndef\nghi\n", "abc" },
-- { "(?s)(abc.*def)", "abc\ndef\n", NULL },
-- { "(abc(.|\n)*def)", "abc\ndef\n", NULL },
-- { "(abc[^x]*def)", "abc\ndef\n", NULL },
-- { "(abc[^x]*def)", "abczzzdef\ndef\n", "abczzzdef" },
--};
--TEST(RE2, NeverNewline) {
-- RE2::Options opt;
-- opt.set_never_nl(true);
-- for (int i = 0; i < arraysize(never_tests); i++) {
-- const NeverTest& t = never_tests[i];
-- RE2 re(t.regexp, opt);
-- if (t.match == NULL) {
-- EXPECT_FALSE(re.PartialMatch(t.text, re));
-- } else {
-- StringPiece m;
-- EXPECT_TRUE(re.PartialMatch(t.text, re, &m));
-- EXPECT_EQ(m, t.match);
-- }
-- }
--}
--
--// Bitstate bug was looking at submatch[0] even if nsubmatch == 0.
--// Triggered by a failed DFA search falling back to Bitstate when
--// using Match with a NULL submatch set. Bitstate tried to read
--// the submatch[0] entry even if nsubmatch was 0.
--TEST(RE2, BitstateCaptureBug) {
-- RE2::Options opt;
-- opt.set_max_mem(20000);
-- RE2 re("(_________$)", opt);
-- StringPiece s = "xxxxxxxxxxxxxxxxxxxxxxxxxx_________x";
-- EXPECT_FALSE(re.Match(s, 0, s.size(), RE2::UNANCHORED, NULL, 0));
--}
--
--// C++ version of bug 609710.
--TEST(RE2, UnicodeClasses) {
-- const string str = "ABCDEFGHI譚永鋒";
-- string a, b, c;
--
-- EXPECT_TRUE(RE2::FullMatch("A", "\\p{L}"));
-- EXPECT_TRUE(RE2::FullMatch("A", "\\p{Lu}"));
-- EXPECT_FALSE(RE2::FullMatch("A", "\\p{Ll}"));
-- EXPECT_FALSE(RE2::FullMatch("A", "\\P{L}"));
-- EXPECT_FALSE(RE2::FullMatch("A", "\\P{Lu}"));
-- EXPECT_TRUE(RE2::FullMatch("A", "\\P{Ll}"));
--
-- EXPECT_TRUE(RE2::FullMatch("譚", "\\p{L}"));
-- EXPECT_FALSE(RE2::FullMatch("譚", "\\p{Lu}"));
-- EXPECT_FALSE(RE2::FullMatch("譚", "\\p{Ll}"));
-- EXPECT_FALSE(RE2::FullMatch("譚", "\\P{L}"));
-- EXPECT_TRUE(RE2::FullMatch("譚", "\\P{Lu}"));
-- EXPECT_TRUE(RE2::FullMatch("譚", "\\P{Ll}"));
--
-- EXPECT_TRUE(RE2::FullMatch("永", "\\p{L}"));
-- EXPECT_FALSE(RE2::FullMatch("永", "\\p{Lu}"));
-- EXPECT_FALSE(RE2::FullMatch("永", "\\p{Ll}"));
-- EXPECT_FALSE(RE2::FullMatch("永", "\\P{L}"));
-- EXPECT_TRUE(RE2::FullMatch("永", "\\P{Lu}"));
-- EXPECT_TRUE(RE2::FullMatch("永", "\\P{Ll}"));
--
-- EXPECT_TRUE(RE2::FullMatch("鋒", "\\p{L}"));
-- EXPECT_FALSE(RE2::FullMatch("鋒", "\\p{Lu}"));
-- EXPECT_FALSE(RE2::FullMatch("鋒", "\\p{Ll}"));
-- EXPECT_FALSE(RE2::FullMatch("鋒", "\\P{L}"));
-- EXPECT_TRUE(RE2::FullMatch("鋒", "\\P{Lu}"));
-- EXPECT_TRUE(RE2::FullMatch("鋒", "\\P{Ll}"));
--
-- EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?(.).*?(.)", &a, &b, &c));
-- EXPECT_EQ("A", a);
-- EXPECT_EQ("B", b);
-- EXPECT_EQ("C", c);
--
-- EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{L}]).*?(.)", &a, &b, &c));
-- EXPECT_EQ("A", a);
-- EXPECT_EQ("B", b);
-- EXPECT_EQ("C", c);
--
-- EXPECT_FALSE(RE2::PartialMatch(str, "\\P{L}"));
--
-- EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{Lu}]).*?(.)", &a, &b, &c));
-- EXPECT_EQ("A", a);
-- EXPECT_EQ("B", b);
-- EXPECT_EQ("C", c);
--
-- EXPECT_FALSE(RE2::PartialMatch(str, "[^\\p{Lu}\\p{Lo}]"));
--
-- EXPECT_TRUE(RE2::PartialMatch(str, ".*(.).*?([\\p{Lu}\\p{Lo}]).*?(.)", &a, &b, &c));
-- EXPECT_EQ("譚", a);
-- EXPECT_EQ("永", b);
-- EXPECT_EQ("鋒", c);
--}
--
--// Bug reported by saito. 2009/02/17
--TEST(RE2, NullVsEmptyString) {
-- RE2 re2(".*");
-- StringPiece v1("");
-- EXPECT_TRUE(RE2::FullMatch(v1, re2));
--
-- StringPiece v2;
-- EXPECT_TRUE(RE2::FullMatch(v2, re2));
--}
--
--// Issue 1816809
--TEST(RE2, Bug1816809) {
-- RE2 re("(((((llx((-3)|(4)))(;(llx((-3)|(4))))*))))");
-- StringPiece piece("llx-3;llx4");
-- string x;
-- EXPECT_TRUE(RE2::Consume(&piece, re, &x));
--}
--
--// Issue 3061120
--TEST(RE2, Bug3061120) {
-- RE2 re("(?i)\\W");
-- EXPECT_FALSE(RE2::PartialMatch("x", re)); // always worked
-- EXPECT_FALSE(RE2::PartialMatch("k", re)); // broke because of kelvin
-- EXPECT_FALSE(RE2::PartialMatch("s", re)); // broke because of latin long s
--}
--
--TEST(RE2, CapturingGroupNames) {
-- // Opening parentheses annotated with group IDs:
-- // 12 3 45 6 7
-- RE2 re("((abc)(?P<G2>)|((e+)(?P<G2>.*)(?P<G1>u+)))");
-- EXPECT_TRUE(re.ok());
-- const map<int, string>& have = re.CapturingGroupNames();
-- map<int, string> want;
-- want[3] = "G2";
-- want[6] = "G2";
-- want[7] = "G1";
-- EXPECT_EQ(want, have);
--}
--
--TEST(RE2, RegexpToStringLossOfAnchor) {
-- EXPECT_EQ(RE2("^[a-c]at", RE2::POSIX).Regexp()->ToString(), "^[a-c]at");
-- EXPECT_EQ(RE2("^[a-c]at").Regexp()->ToString(), "(?-m:^)[a-c]at");
-- EXPECT_EQ(RE2("ca[t-z]$", RE2::POSIX).Regexp()->ToString(), "ca[t-z]$");
-- EXPECT_EQ(RE2("ca[t-z]$").Regexp()->ToString(), "ca[t-z](?-m:$)");
--}
--
--} // namespace re2
-diff --git a/re2/re2/testing/regexp_benchmark.cc b/re2/re2/testing/regexp_benchmark.cc
-deleted file mode 100644
-index ca7627f..0000000
---- a/re2/re2/testing/regexp_benchmark.cc
-+++ /dev/null
-@@ -1,1461 +0,0 @@
--// Copyright 2006-2008 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Benchmarks for regular expression implementations.
--
--#include "util/test.h"
--#include "re2/prog.h"
--#include "re2/re2.h"
--#include "re2/regexp.h"
--#include "util/pcre.h"
--#include "util/benchmark.h"
--
--namespace re2 {
--void Test();
--void MemoryUsage();
--} // namespace re2
--
--typedef testing::MallocCounter MallocCounter;
--
--namespace re2 {
--
--void Test() {
-- Regexp* re = Regexp::Parse("(\\d+)-(\\d+)-(\\d+)", Regexp::LikePerl, NULL);
-- CHECK(re);
-- Prog* prog = re->CompileToProg(0);
-- CHECK(prog);
-- CHECK(prog->IsOnePass());
-- const char* text = "650-253-0001";
-- StringPiece sp[4];
-- CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
-- CHECK_EQ(sp[0], "650-253-0001");
-- CHECK_EQ(sp[1], "650");
-- CHECK_EQ(sp[2], "253");
-- CHECK_EQ(sp[3], "0001");
-- delete prog;
-- re->Decref();
-- LOG(INFO) << "test passed\n";
--}
--
--void MemoryUsage() {
-- const char* regexp = "(\\d+)-(\\d+)-(\\d+)";
-- const char* text = "650-253-0001";
-- {
-- MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
-- Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
-- CHECK(re);
-- // Can't pass mc.HeapGrowth() and mc.PeakHeapGrowth() to LOG(INFO) directly,
-- // because LOG(INFO) might do a big allocation before they get evaluated.
-- fprintf(stderr, "Regexp: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
-- mc.Reset();
--
-- Prog* prog = re->CompileToProg(0);
-- CHECK(prog);
-- CHECK(prog->IsOnePass());
-- fprintf(stderr, "Prog: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
-- mc.Reset();
--
-- StringPiece sp[4];
-- CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
-- fprintf(stderr, "Search: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
-- delete prog;
-- re->Decref();
-- }
--
-- {
-- MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
--
-- PCRE re(regexp, PCRE::UTF8);
-- fprintf(stderr, "RE: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
-- PCRE::FullMatch(text, re);
-- fprintf(stderr, "RE: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
-- }
--
-- {
-- MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
--
-- PCRE* re = new PCRE(regexp, PCRE::UTF8);
-- fprintf(stderr, "PCRE*: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
-- PCRE::FullMatch(text, *re);
-- fprintf(stderr, "PCRE*: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
-- delete re;
-- }
--
-- {
-- MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
--
-- RE2 re(regexp);
-- fprintf(stderr, "RE2: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
-- RE2::FullMatch(text, re);
-- fprintf(stderr, "RE2: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
-- }
--
-- fprintf(stderr, "sizeof: PCRE=%d RE2=%d Prog=%d Inst=%d\n",
-- static_cast<int>(sizeof(PCRE)),
-- static_cast<int>(sizeof(RE2)),
-- static_cast<int>(sizeof(Prog)),
-- static_cast<int>(sizeof(Prog::Inst)));
--}
--
--// Regular expression implementation wrappers.
--// Defined at bottom of file, but they are repetitive
--// and not interesting.
--
--typedef void SearchImpl(int iters, const char* regexp, const StringPiece& text,
-- Prog::Anchor anchor, bool expect_match);
--
--SearchImpl SearchDFA, SearchNFA, SearchOnePass, SearchBitState,
-- SearchPCRE, SearchRE2,
-- SearchCachedDFA, SearchCachedNFA, SearchCachedOnePass, SearchCachedBitState,
-- SearchCachedPCRE, SearchCachedRE2;
--
--typedef void ParseImpl(int iters, const char* regexp, const StringPiece& text);
--
--ParseImpl Parse1NFA, Parse1OnePass, Parse1BitState,
-- Parse1PCRE, Parse1RE2,
-- Parse1Backtrack,
-- Parse1CachedNFA, Parse1CachedOnePass, Parse1CachedBitState,
-- Parse1CachedPCRE, Parse1CachedRE2,
-- Parse1CachedBacktrack;
--
--ParseImpl Parse3NFA, Parse3OnePass, Parse3BitState,
-- Parse3PCRE, Parse3RE2,
-- Parse3Backtrack,
-- Parse3CachedNFA, Parse3CachedOnePass, Parse3CachedBitState,
-- Parse3CachedPCRE, Parse3CachedRE2,
-- Parse3CachedBacktrack;
--
--ParseImpl SearchParse2CachedPCRE, SearchParse2CachedRE2;
--
--ParseImpl SearchParse1CachedPCRE, SearchParse1CachedRE2;
--
--// Benchmark: failed search for regexp in random text.
--
--// Generate random text that won't contain the search string,
--// to test worst-case search behavior.
--void MakeText(string* text, int nbytes) {
-- text->resize(nbytes);
-- srand(0);
-- for (int i = 0; i < nbytes; i++) {
-- if (!rand()%30)
-- (*text)[i] = '\n';
-- else
-- (*text)[i] = rand()%(0x7E + 1 - 0x20)+0x20;
-- }
--}
--
--// Makes text of size nbytes, then calls run to search
--// the text for regexp iters times.
--void Search(int iters, int nbytes, const char* regexp, SearchImpl* search) {
-- StopBenchmarkTiming();
-- string s;
-- MakeText(&s, nbytes);
-- BenchmarkMemoryUsage();
-- StartBenchmarkTiming();
-- search(iters, regexp, s, Prog::kUnanchored, false);
-- SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes);
--}
--
--// These two are easy because they start with an A,
--// giving the search loop something to memchr for.
--#define EASY0 "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
--#define EASY1 "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"
--
--// This is a little harder, since it starts with a character class
--// and thus can't be memchr'ed. Could look for ABC and work backward,
--// but no one does that.
--#define MEDIUM "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
--
--// This is a fair amount harder, because of the leading [ -~]*.
--// A bad backtracking implementation will take O(text^2) time to
--// figure out there's no match.
--#define HARD "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
--
--// This stresses engines that are trying to track parentheses.
--#define PARENS "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" \
-- "(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$"
--
--void Search_Easy0_CachedDFA(int i, int n) { Search(i, n, EASY0, SearchCachedDFA); }
--void Search_Easy0_CachedNFA(int i, int n) { Search(i, n, EASY0, SearchCachedNFA); }
--void Search_Easy0_CachedPCRE(int i, int n) { Search(i, n, EASY0, SearchCachedPCRE); }
--void Search_Easy0_CachedRE2(int i, int n) { Search(i, n, EASY0, SearchCachedRE2); }
--
--BENCHMARK_RANGE(Search_Easy0_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
--BENCHMARK_RANGE(Search_Easy0_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
--#ifdef USEPCRE
--BENCHMARK_RANGE(Search_Easy0_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
--#endif
--BENCHMARK_RANGE(Search_Easy0_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
--
--void Search_Easy1_CachedDFA(int i, int n) { Search(i, n, EASY1, SearchCachedDFA); }
--void Search_Easy1_CachedNFA(int i, int n) { Search(i, n, EASY1, SearchCachedNFA); }
--void Search_Easy1_CachedPCRE(int i, int n) { Search(i, n, EASY1, SearchCachedPCRE); }
--void Search_Easy1_CachedRE2(int i, int n) { Search(i, n, EASY1, SearchCachedRE2); }
--
--BENCHMARK_RANGE(Search_Easy1_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
--BENCHMARK_RANGE(Search_Easy1_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
--#ifdef USEPCRE
--BENCHMARK_RANGE(Search_Easy1_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
--#endif
--BENCHMARK_RANGE(Search_Easy1_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
--
--void Search_Medium_CachedDFA(int i, int n) { Search(i, n, MEDIUM, SearchCachedDFA); }
--void Search_Medium_CachedNFA(int i, int n) { Search(i, n, MEDIUM, SearchCachedNFA); }
--void Search_Medium_CachedPCRE(int i, int n) { Search(i, n, MEDIUM, SearchCachedPCRE); }
--void Search_Medium_CachedRE2(int i, int n) { Search(i, n, MEDIUM, SearchCachedRE2); }
--
--BENCHMARK_RANGE(Search_Medium_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
--BENCHMARK_RANGE(Search_Medium_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
--#ifdef USEPCRE
--BENCHMARK_RANGE(Search_Medium_CachedPCRE, 8, 256<<10)->ThreadRange(1, NumCPUs());
--#endif
--BENCHMARK_RANGE(Search_Medium_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
--
--void Search_Hard_CachedDFA(int i, int n) { Search(i, n, HARD, SearchCachedDFA); }
--void Search_Hard_CachedNFA(int i, int n) { Search(i, n, HARD, SearchCachedNFA); }
--void Search_Hard_CachedPCRE(int i, int n) { Search(i, n, HARD, SearchCachedPCRE); }
--void Search_Hard_CachedRE2(int i, int n) { Search(i, n, HARD, SearchCachedRE2); }
--
--BENCHMARK_RANGE(Search_Hard_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
--BENCHMARK_RANGE(Search_Hard_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
--#ifdef USEPCRE
--BENCHMARK_RANGE(Search_Hard_CachedPCRE, 8, 4<<10)->ThreadRange(1, NumCPUs());
--#endif
--BENCHMARK_RANGE(Search_Hard_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
--
--void Search_Parens_CachedDFA(int i, int n) { Search(i, n, PARENS, SearchCachedDFA); }
--void Search_Parens_CachedNFA(int i, int n) { Search(i, n, PARENS, SearchCachedNFA); }
--void Search_Parens_CachedPCRE(int i, int n) { Search(i, n, PARENS, SearchCachedPCRE); }
--void Search_Parens_CachedRE2(int i, int n) { Search(i, n, PARENS, SearchCachedRE2); }
--
--BENCHMARK_RANGE(Search_Parens_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
--BENCHMARK_RANGE(Search_Parens_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
--#ifdef USEPCRE
--BENCHMARK_RANGE(Search_Parens_CachedPCRE, 8, 8)->ThreadRange(1, NumCPUs());
--#endif
--BENCHMARK_RANGE(Search_Parens_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
--
--void SearchBigFixed(int iters, int nbytes, SearchImpl* search) {
-- StopBenchmarkTiming();
-- string s;
-- s.append(nbytes/2, 'x');
-- string regexp = "^" + s + ".*$";
-- string t;
-- MakeText(&t, nbytes/2);
-- s += t;
-- BenchmarkMemoryUsage();
-- StartBenchmarkTiming();
-- search(iters, regexp.c_str(), s, Prog::kUnanchored, true);
-- SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes);
--}
--
--void Search_BigFixed_CachedDFA(int i, int n) { SearchBigFixed(i, n, SearchCachedDFA); }
--void Search_BigFixed_CachedNFA(int i, int n) { SearchBigFixed(i, n, SearchCachedNFA); }
--void Search_BigFixed_CachedPCRE(int i, int n) { SearchBigFixed(i, n, SearchCachedPCRE); }
--void Search_BigFixed_CachedRE2(int i, int n) { SearchBigFixed(i, n, SearchCachedRE2); }
--
--BENCHMARK_RANGE(Search_BigFixed_CachedDFA, 8, 1<<20)->ThreadRange(1, NumCPUs());
--BENCHMARK_RANGE(Search_BigFixed_CachedNFA, 8, 32<<10)->ThreadRange(1, NumCPUs());
--#ifdef USEPCRE
--BENCHMARK_RANGE(Search_BigFixed_CachedPCRE, 8, 32<<10)->ThreadRange(1, NumCPUs());
--#endif
--BENCHMARK_RANGE(Search_BigFixed_CachedRE2, 8, 1<<20)->ThreadRange(1, NumCPUs());
--
--// Benchmark: FindAndConsume
--void FindAndConsume(int iters, int nbytes) {
-- StopBenchmarkTiming();
-- string s;
-- MakeText(&s, nbytes);
-- s.append("Hello World");
-- StartBenchmarkTiming();
-- RE2 re("((Hello World))");
-- for (int i = 0; i < iters; i++) {
-- StringPiece t = s;
-- StringPiece u;
-- CHECK(RE2::FindAndConsume(&t, re, &u));
-- CHECK_EQ(u, "Hello World");
-- }
-- SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes);
--}
--
--BENCHMARK_RANGE(FindAndConsume, 8, 16<<20)->ThreadRange(1, NumCPUs());
--
--// Benchmark: successful anchored search.
--
--void SearchSuccess(int iters, int nbytes, const char* regexp, SearchImpl* search) {
-- string s;
-- MakeText(&s, nbytes);
-- BenchmarkMemoryUsage();
-- search(iters, regexp, s, Prog::kAnchored, true);
-- SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes);
--}
--
--// Unambiguous search (RE2 can use OnePass).
--
--void Search_Success_DFA(int i, int n) { SearchSuccess(i, n, ".*$", SearchDFA); }
--void Search_Success_OnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchOnePass); }
--void Search_Success_PCRE(int i, int n) { SearchSuccess(i, n, ".*$", SearchPCRE); }
--void Search_Success_RE2(int i, int n) { SearchSuccess(i, n, ".*$", SearchRE2); }
--
--BENCHMARK_RANGE(Search_Success_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
--#ifdef USEPCRE
--BENCHMARK_RANGE(Search_Success_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
--#endif
--BENCHMARK_RANGE(Search_Success_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
--BENCHMARK_RANGE(Search_Success_OnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
--
--void Search_Success_CachedDFA(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedDFA); }
--void Search_Success_CachedOnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedOnePass); }
--void Search_Success_CachedPCRE(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedPCRE); }
--void Search_Success_CachedRE2(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedRE2); }
--
--BENCHMARK_RANGE(Search_Success_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
--#ifdef USEPCRE
--BENCHMARK_RANGE(Search_Success_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
--#endif
--BENCHMARK_RANGE(Search_Success_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
--BENCHMARK_RANGE(Search_Success_CachedOnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
--
--// Ambiguous search (RE2 cannot use OnePass).
--
--void Search_Success1_DFA(int i, int n) { SearchSuccess(i, n, ".*.$", SearchDFA); }
--void Search_Success1_PCRE(int i, int n) { SearchSuccess(i, n, ".*.$", SearchPCRE); }
--void Search_Success1_RE2(int i, int n) { SearchSuccess(i, n, ".*.$", SearchRE2); }
--void Search_Success1_BitState(int i, int n) { SearchSuccess(i, n, ".*.$", SearchBitState); }
--
--BENCHMARK_RANGE(Search_Success1_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
--#ifdef USEPCRE
--BENCHMARK_RANGE(Search_Success1_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
--#endif
--BENCHMARK_RANGE(Search_Success1_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
--BENCHMARK_RANGE(Search_Success1_BitState, 8, 2<<20)->ThreadRange(1, NumCPUs());
--
--void Search_Success1_Cached_DFA(int i, int n) { SearchSuccess(i, n, ".*.$", SearchCachedDFA); }
--void Search_Success1_Cached_PCRE(int i, int n) { SearchSuccess(i, n, ".*.$", SearchCachedPCRE); }
--void Search_Success1_Cached_RE2(int i, int n) { SearchSuccess(i, n, ".*.$", SearchCachedRE2); }
--
--BENCHMARK_RANGE(Search_Success1_Cached_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
--#ifdef USEPCRE
--BENCHMARK_RANGE(Search_Success1_Cached_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
--#endif
--BENCHMARK_RANGE(Search_Success1_Cached_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
--
--// Benchmark: use regexp to find phone number.
--
--void SearchDigits(int iters, SearchImpl* search) {
-- const char *text = "650-253-0001";
-- int len = strlen(text);
-- BenchmarkMemoryUsage();
-- search(iters, "([0-9]+)-([0-9]+)-([0-9]+)",
-- StringPiece(text, len), Prog::kAnchored, true);
-- SetBenchmarkItemsProcessed(iters);
--}
--
--void Search_Digits_DFA(int i) { SearchDigits(i, SearchDFA); }
--void Search_Digits_NFA(int i) { SearchDigits(i, SearchNFA); }
--void Search_Digits_OnePass(int i) { SearchDigits(i, SearchOnePass); }
--void Search_Digits_PCRE(int i) { SearchDigits(i, SearchPCRE); }
--void Search_Digits_RE2(int i) { SearchDigits(i, SearchRE2); }
--void Search_Digits_BitState(int i) { SearchDigits(i, SearchBitState); }
--
--BENCHMARK(Search_Digits_DFA)->ThreadRange(1, NumCPUs());
--BENCHMARK(Search_Digits_NFA)->ThreadRange(1, NumCPUs());
--BENCHMARK(Search_Digits_OnePass)->ThreadRange(1, NumCPUs());
--#ifdef USEPCRE
--BENCHMARK(Search_Digits_PCRE)->ThreadRange(1, NumCPUs());
--#endif
--BENCHMARK(Search_Digits_RE2)->ThreadRange(1, NumCPUs());
--BENCHMARK(Search_Digits_BitState)->ThreadRange(1, NumCPUs());
--
--// Benchmark: use regexp to parse digit fields in phone number.
--
--void Parse3Digits(int iters,
-- void (*parse3)(int, const char*, const StringPiece&)) {
-- BenchmarkMemoryUsage();
-- parse3(iters, "([0-9]+)-([0-9]+)-([0-9]+)", "650-253-0001");
-- SetBenchmarkItemsProcessed(iters);
--}
--
--void Parse_Digits_NFA(int i) { Parse3Digits(i, Parse3NFA); }
--void Parse_Digits_OnePass(int i) { Parse3Digits(i, Parse3OnePass); }
--void Parse_Digits_PCRE(int i) { Parse3Digits(i, Parse3PCRE); }
--void Parse_Digits_RE2(int i) { Parse3Digits(i, Parse3RE2); }
--void Parse_Digits_Backtrack(int i) { Parse3Digits(i, Parse3Backtrack); }
--void Parse_Digits_BitState(int i) { Parse3Digits(i, Parse3BitState); }
--
--BENCHMARK(Parse_Digits_NFA)->ThreadRange(1, NumCPUs());
--BENCHMARK(Parse_Digits_OnePass)->ThreadRange(1, NumCPUs());
--#ifdef USEPCRE
--BENCHMARK(Parse_Digits_PCRE)->ThreadRange(1, NumCPUs());
--#endif
--BENCHMARK(Parse_Digits_RE2)->ThreadRange(1, NumCPUs());
--BENCHMARK(Parse_Digits_Backtrack)->ThreadRange(1, NumCPUs());
--BENCHMARK(Parse_Digits_BitState)->ThreadRange(1, NumCPUs());
--
--void Parse_CachedDigits_NFA(int i) { Parse3Digits(i, Parse3CachedNFA); }
--void Parse_CachedDigits_OnePass(int i) { Parse3Digits(i, Parse3CachedOnePass); }
--void Parse_CachedDigits_PCRE(int i) { Parse3Digits(i, Parse3CachedPCRE); }
--void Parse_CachedDigits_RE2(int i) { Parse3Digits(i, Parse3CachedRE2); }
--void Parse_CachedDigits_Backtrack(int i) { Parse3Digits(i, Parse3CachedBacktrack); }
--void Parse_CachedDigits_BitState(int i) { Parse3Digits(i, Parse3CachedBitState); }
--
--BENCHMARK(Parse_CachedDigits_NFA)->ThreadRange(1, NumCPUs());
--BENCHMARK(Parse_CachedDigits_OnePass)->ThreadRange(1, NumCPUs());
--#ifdef USEPCRE
--BENCHMARK(Parse_CachedDigits_PCRE)->ThreadRange(1, NumCPUs());
--#endif
--BENCHMARK(Parse_CachedDigits_Backtrack)->ThreadRange(1, NumCPUs());
--BENCHMARK(Parse_CachedDigits_RE2)->ThreadRange(1, NumCPUs());
--BENCHMARK(Parse_CachedDigits_BitState)->ThreadRange(1, NumCPUs());
--
--void Parse3DigitDs(int iters,
-- void (*parse3)(int, const char*, const StringPiece&)) {
-- BenchmarkMemoryUsage();
-- parse3(iters, "(\\d+)-(\\d+)-(\\d+)", "650-253-0001");
-- SetBenchmarkItemsProcessed(iters);
--}
--
--void Parse_DigitDs_NFA(int i) { Parse3DigitDs(i, Parse3NFA); }
--void Parse_DigitDs_OnePass(int i) { Parse3DigitDs(i, Parse3OnePass); }
--void Parse_DigitDs_PCRE(int i) { Parse3DigitDs(i, Parse3PCRE); }
--void Parse_DigitDs_RE2(int i) { Parse3DigitDs(i, Parse3RE2); }
--void Parse_DigitDs_Backtrack(int i) { Parse3DigitDs(i, Parse3CachedBacktrack); }
--void Parse_DigitDs_BitState(int i) { Parse3DigitDs(i, Parse3CachedBitState); }
--
--BENCHMARK(Parse_DigitDs_NFA)->ThreadRange(1, NumCPUs());
--BENCHMARK(Parse_DigitDs_OnePass)->ThreadRange(1, NumCPUs());
--#ifdef USEPCRE
--BENCHMARK(Parse_DigitDs_PCRE)->ThreadRange(1, NumCPUs());
--#endif
--BENCHMARK(Parse_DigitDs_RE2)->ThreadRange(1, NumCPUs());
--BENCHMARK(Parse_DigitDs_Backtrack)->ThreadRange(1, NumCPUs());
--BENCHMARK(Parse_DigitDs_BitState)->ThreadRange(1, NumCPUs());
--
--void Parse_CachedDigitDs_NFA(int i) { Parse3DigitDs(i, Parse3CachedNFA); }
--void Parse_CachedDigitDs_OnePass(int i) { Parse3DigitDs(i, Parse3CachedOnePass); }
--void Parse_CachedDigitDs_PCRE(int i) { Parse3DigitDs(i, Parse3CachedPCRE); }
--void Parse_CachedDigitDs_RE2(int i) { Parse3DigitDs(i, Parse3CachedRE2); }
--void Parse_CachedDigitDs_Backtrack(int i) { Parse3DigitDs(i, Parse3CachedBacktrack); }
--void Parse_CachedDigitDs_BitState(int i) { Parse3DigitDs(i, Parse3CachedBitState); }
--
--BENCHMARK(Parse_CachedDigitDs_NFA)->ThreadRange(1, NumCPUs());
--BENCHMARK(Parse_CachedDigitDs_OnePass)->ThreadRange(1, NumCPUs());
--#ifdef USEPCRE
--BENCHMARK(Parse_CachedDigitDs_PCRE)->ThreadRange(1, NumCPUs());
--#endif
--BENCHMARK(Parse_CachedDigitDs_Backtrack)->ThreadRange(1, NumCPUs());
--BENCHMARK(Parse_CachedDigitDs_RE2)->ThreadRange(1, NumCPUs());
--BENCHMARK(Parse_CachedDigitDs_BitState)->ThreadRange(1, NumCPUs());
--
--// Benchmark: splitting off leading number field.
--
--void Parse1Split(int iters,
-- void (*parse1)(int, const char*, const StringPiece&)) {
-- BenchmarkMemoryUsage();
-- parse1(iters, "[0-9]+-(.*)", "650-253-0001");
-- SetBenchmarkItemsProcessed(iters);
--}
--
--void Parse_Split_NFA(int i) { Parse1Split(i, Parse1NFA); }
--void Parse_Split_OnePass(int i) { Parse1Split(i, Parse1OnePass); }
--void Parse_Split_PCRE(int i) { Parse1Split(i, Parse1PCRE); }
--void Parse_Split_RE2(int i) { Parse1Split(i, Parse1RE2); }
--void Parse_Split_BitState(int i) { Parse1Split(i, Parse1BitState); }
--
--BENCHMARK(Parse_Split_NFA)->ThreadRange(1, NumCPUs());
--BENCHMARK(Parse_Split_OnePass)->ThreadRange(1, NumCPUs());
--#ifdef USEPCRE
--BENCHMARK(Parse_Split_PCRE)->ThreadRange(1, NumCPUs());
--#endif
--BENCHMARK(Parse_Split_RE2)->ThreadRange(1, NumCPUs());
--BENCHMARK(Parse_Split_BitState)->ThreadRange(1, NumCPUs());
--
--void Parse_CachedSplit_NFA(int i) { Parse1Split(i, Parse1CachedNFA); }
--void Parse_CachedSplit_OnePass(int i) { Parse1Split(i, Parse1CachedOnePass); }
--void Parse_CachedSplit_PCRE(int i) { Parse1Split(i, Parse1CachedPCRE); }
--void Parse_CachedSplit_RE2(int i) { Parse1Split(i, Parse1CachedRE2); }
--void Parse_CachedSplit_BitState(int i) { Parse1Split(i, Parse1CachedBitState); }
--
--BENCHMARK(Parse_CachedSplit_NFA)->ThreadRange(1, NumCPUs());
--BENCHMARK(Parse_CachedSplit_OnePass)->ThreadRange(1, NumCPUs());
--#ifdef USEPCRE
--BENCHMARK(Parse_CachedSplit_PCRE)->ThreadRange(1, NumCPUs());
--#endif
--BENCHMARK(Parse_CachedSplit_RE2)->ThreadRange(1, NumCPUs());
--BENCHMARK(Parse_CachedSplit_BitState)->ThreadRange(1, NumCPUs());
--
--// Benchmark: splitting off leading number field but harder (ambiguous regexp).
--
--void Parse1SplitHard(int iters,
-- void (*run)(int, const char*, const StringPiece&)) {
-- BenchmarkMemoryUsage();
-- run(iters, "[0-9]+.(.*)", "650-253-0001");
-- SetBenchmarkItemsProcessed(iters);
--}
--
--void Parse_SplitHard_NFA(int i) { Parse1SplitHard(i, Parse1NFA); }
--void Parse_SplitHard_PCRE(int i) { Parse1SplitHard(i, Parse1PCRE); }
--void Parse_SplitHard_RE2(int i) { Parse1SplitHard(i, Parse1RE2); }
--void Parse_SplitHard_BitState(int i) { Parse1SplitHard(i, Parse1BitState); }
--
--#ifdef USEPCRE
--BENCHMARK(Parse_SplitHard_PCRE)->ThreadRange(1, NumCPUs());
--#endif
--BENCHMARK(Parse_SplitHard_RE2)->ThreadRange(1, NumCPUs());
--BENCHMARK(Parse_SplitHard_BitState)->ThreadRange(1, NumCPUs());
--BENCHMARK(Parse_SplitHard_NFA)->ThreadRange(1, NumCPUs());
--
--void Parse_CachedSplitHard_NFA(int i) { Parse1SplitHard(i, Parse1CachedNFA); }
--void Parse_CachedSplitHard_PCRE(int i) { Parse1SplitHard(i, Parse1CachedPCRE); }
--void Parse_CachedSplitHard_RE2(int i) { Parse1SplitHard(i, Parse1CachedRE2); }
--void Parse_CachedSplitHard_BitState(int i) { Parse1SplitHard(i, Parse1CachedBitState); }
--void Parse_CachedSplitHard_Backtrack(int i) { Parse1SplitHard(i, Parse1CachedBacktrack); }
--
--#ifdef USEPCRE
--BENCHMARK(Parse_CachedSplitHard_PCRE)->ThreadRange(1, NumCPUs());
--#endif
--BENCHMARK(Parse_CachedSplitHard_RE2)->ThreadRange(1, NumCPUs());
--BENCHMARK(Parse_CachedSplitHard_BitState)->ThreadRange(1, NumCPUs());
--BENCHMARK(Parse_CachedSplitHard_NFA)->ThreadRange(1, NumCPUs());
--BENCHMARK(Parse_CachedSplitHard_Backtrack)->ThreadRange(1, NumCPUs());
--
--// Benchmark: Parse1SplitHard, big text, small match.
--
--void Parse1SplitBig1(int iters,
-- void (*run)(int, const char*, const StringPiece&)) {
-- string s;
-- s.append(100000, 'x');
-- s.append("650-253-0001");
-- BenchmarkMemoryUsage();
-- run(iters, "[0-9]+.(.*)", s);
-- SetBenchmarkItemsProcessed(iters);
--}
--
--void Parse_CachedSplitBig1_PCRE(int i) { Parse1SplitBig1(i, SearchParse1CachedPCRE); }
--void Parse_CachedSplitBig1_RE2(int i) { Parse1SplitBig1(i, SearchParse1CachedRE2); }
--
--#ifdef USEPCRE
--BENCHMARK(Parse_CachedSplitBig1_PCRE)->ThreadRange(1, NumCPUs());
--#endif
--BENCHMARK(Parse_CachedSplitBig1_RE2)->ThreadRange(1, NumCPUs());
--
--// Benchmark: Parse1SplitHard, big text, big match.
--
--void Parse1SplitBig2(int iters,
-- void (*run)(int, const char*, const StringPiece&)) {
-- string s;
-- s.append("650-253-");
-- s.append(100000, '0');
-- BenchmarkMemoryUsage();
-- run(iters, "[0-9]+.(.*)", s);
-- SetBenchmarkItemsProcessed(iters);
--}
--
--void Parse_CachedSplitBig2_PCRE(int i) { Parse1SplitBig2(i, SearchParse1CachedPCRE); }
--void Parse_CachedSplitBig2_RE2(int i) { Parse1SplitBig2(i, SearchParse1CachedRE2); }
--
--#ifdef USEPCRE
--BENCHMARK(Parse_CachedSplitBig2_PCRE)->ThreadRange(1, NumCPUs());
--#endif
--BENCHMARK(Parse_CachedSplitBig2_RE2)->ThreadRange(1, NumCPUs());
--
--// Benchmark: measure time required to parse (but not execute)
--// a simple regular expression.
--
--void ParseRegexp(int iters, const string& regexp) {
-- for (int i = 0; i < iters; i++) {
-- Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
-- CHECK(re);
-- re->Decref();
-- }
--}
--
--void SimplifyRegexp(int iters, const string& regexp) {
-- for (int i = 0; i < iters; i++) {
-- Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
-- CHECK(re);
-- Regexp* sre = re->Simplify();
-- CHECK(sre);
-- sre->Decref();
-- re->Decref();
-- }
--}
--
--void NullWalkRegexp(int iters, const string& regexp) {
-- Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
-- CHECK(re);
-- for (int i = 0; i < iters; i++) {
-- re->NullWalk();
-- }
-- re->Decref();
--}
--
--void SimplifyCompileRegexp(int iters, const string& regexp) {
-- for (int i = 0; i < iters; i++) {
-- Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
-- CHECK(re);
-- Regexp* sre = re->Simplify();
-- CHECK(sre);
-- Prog* prog = sre->CompileToProg(0);
-- CHECK(prog);
-- delete prog;
-- sre->Decref();
-- re->Decref();
-- }
--}
--
--void CompileRegexp(int iters, const string& regexp) {
-- for (int i = 0; i < iters; i++) {
-- Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
-- CHECK(re);
-- Prog* prog = re->CompileToProg(0);
-- CHECK(prog);
-- delete prog;
-- re->Decref();
-- }
--}
--
--void CompileToProg(int iters, const string& regexp) {
-- Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
-- CHECK(re);
-- for (int i = 0; i < iters; i++) {
-- Prog* prog = re->CompileToProg(0);
-- CHECK(prog);
-- delete prog;
-- }
-- re->Decref();
--}
--
--void CompileByteMap(int iters, const string& regexp) {
-- Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
-- CHECK(re);
-- Prog* prog = re->CompileToProg(0);
-- CHECK(prog);
-- for (int i = 0; i < iters; i++) {
-- prog->ComputeByteMap();
-- }
-- delete prog;
-- re->Decref();
--}
--
--void CompilePCRE(int iters, const string& regexp) {
-- for (int i = 0; i < iters; i++) {
-- PCRE re(regexp, PCRE::UTF8);
-- CHECK_EQ(re.error(), "");
-- }
--}
--
--void CompileRE2(int iters, const string& regexp) {
-- for (int i = 0; i < iters; i++) {
-- RE2 re(regexp);
-- CHECK_EQ(re.error(), "");
-- }
--}
--
--void RunBuild(int iters, const string& regexp, void (*run)(int, const string&)) {
-- run(iters, regexp);
-- SetBenchmarkItemsProcessed(iters);
--}
--
--} // namespace re2
--
--DEFINE_string(compile_regexp, "(.*)-(\\d+)-of-(\\d+)", "regexp for compile benchmarks");
--
--namespace re2 {
--
--void BM_PCRE_Compile(int i) { RunBuild(i, FLAGS_compile_regexp, CompilePCRE); }
--void BM_Regexp_Parse(int i) { RunBuild(i, FLAGS_compile_regexp, ParseRegexp); }
--void BM_Regexp_Simplify(int i) { RunBuild(i, FLAGS_compile_regexp, SimplifyRegexp); }
--void BM_CompileToProg(int i) { RunBuild(i, FLAGS_compile_regexp, CompileToProg); }
--void BM_CompileByteMap(int i) { RunBuild(i, FLAGS_compile_regexp, CompileByteMap); }
--void BM_Regexp_Compile(int i) { RunBuild(i, FLAGS_compile_regexp, CompileRegexp); }
--void BM_Regexp_SimplifyCompile(int i) { RunBuild(i, FLAGS_compile_regexp, SimplifyCompileRegexp); }
--void BM_Regexp_NullWalk(int i) { RunBuild(i, FLAGS_compile_regexp, NullWalkRegexp); }
--void BM_RE2_Compile(int i) { RunBuild(i, FLAGS_compile_regexp, CompileRE2); }
--
--#ifdef USEPCRE
--BENCHMARK(BM_PCRE_Compile)->ThreadRange(1, NumCPUs());
--#endif
--BENCHMARK(BM_Regexp_Parse)->ThreadRange(1, NumCPUs());
--BENCHMARK(BM_Regexp_Simplify)->ThreadRange(1, NumCPUs());
--BENCHMARK(BM_CompileToProg)->ThreadRange(1, NumCPUs());
--BENCHMARK(BM_CompileByteMap)->ThreadRange(1, NumCPUs());
--BENCHMARK(BM_Regexp_Compile)->ThreadRange(1, NumCPUs());
--BENCHMARK(BM_Regexp_SimplifyCompile)->ThreadRange(1, NumCPUs());
--BENCHMARK(BM_Regexp_NullWalk)->ThreadRange(1, NumCPUs());
--BENCHMARK(BM_RE2_Compile)->ThreadRange(1, NumCPUs());
--
--
--// Makes text of size nbytes, then calls run to search
--// the text for regexp iters times.
--void SearchPhone(int iters, int nbytes, ParseImpl* search) {
-- StopBenchmarkTiming();
-- string s;
-- MakeText(&s, nbytes);
-- s.append("(650) 253-0001");
-- BenchmarkMemoryUsage();
-- StartBenchmarkTiming();
-- search(iters, "(\\d{3}-|\\(\\d{3}\\)\\s+)(\\d{3}-\\d{4})", s);
-- SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes);
--}
--
--void SearchPhone_CachedPCRE(int i, int n) {
-- SearchPhone(i, n, SearchParse2CachedPCRE);
--}
--void SearchPhone_CachedRE2(int i, int n) {
-- SearchPhone(i, n, SearchParse2CachedRE2);
--}
--
--#ifdef USEPCRE
--BENCHMARK_RANGE(SearchPhone_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
--#endif
--BENCHMARK_RANGE(SearchPhone_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
--
--/*
--TODO(rsc): Make this work again.
--
--// Generates and returns a string over binary alphabet {0,1} that contains
--// all possible binary sequences of length n as subsequences. The obvious
--// brute force method would generate a string of length n * 2^n, but this
--// generates a string of length n + 2^n - 1 called a De Bruijn cycle.
--// See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17.
--static string DeBruijnString(int n) {
-- CHECK_LT(n, 8*sizeof(int));
-- CHECK_GT(n, 0);
--
-- vector<bool> did(1<<n);
-- for (int i = 0; i < 1<<n; i++)
-- did[i] = false;
--
-- string s;
-- for (int i = 0; i < n-1; i++)
-- s.append("0");
-- int bits = 0;
-- int mask = (1<<n) - 1;
-- for (int i = 0; i < (1<<n); i++) {
-- bits <<= 1;
-- bits &= mask;
-- if (!did[bits|1]) {
-- bits |= 1;
-- s.append("1");
-- } else {
-- s.append("0");
-- }
-- CHECK(!did[bits]);
-- did[bits] = true;
-- }
-- return s;
--}
--
--void CacheFill(int iters, int n, SearchImpl *srch) {
-- string s = DeBruijnString(n+1);
-- string t;
-- for (int i = n+1; i < 20; i++) {
-- t = s + s;
-- swap(s, t);
-- }
-- srch(iters, StringPrintf("0[01]{%d}$", n).c_str(), s,
-- Prog::kUnanchored, true);
-- SetBenchmarkBytesProcessed(static_cast<int64>(iters)*s.size());
--}
--
--void CacheFillPCRE(int i, int n) { CacheFill(i, n, SearchCachedPCRE); }
--void CacheFillRE2(int i, int n) { CacheFill(i, n, SearchCachedRE2); }
--void CacheFillNFA(int i, int n) { CacheFill(i, n, SearchCachedNFA); }
--void CacheFillDFA(int i, int n) { CacheFill(i, n, SearchCachedDFA); }
--
--// BENCHMARK_WITH_ARG uses __LINE__ to generate distinct identifiers
--// for the static BenchmarkRegisterer, which makes it unusable inside
--// a macro like DO24 below. MY_BENCHMARK_WITH_ARG uses the argument a
--// to make the identifiers distinct (only possible when 'a' is a simple
--// expression like 2, not like 1+1).
--#define MY_BENCHMARK_WITH_ARG(n, a) \
-- bool __benchmark_ ## n ## a = \
-- (new ::testing::Benchmark(#n, NewPermanentCallback(&n)))->ThreadRange(1, NumCPUs());
--
--#define DO24(A, B) \
-- A(B, 1); A(B, 2); A(B, 3); A(B, 4); A(B, 5); A(B, 6); \
-- A(B, 7); A(B, 8); A(B, 9); A(B, 10); A(B, 11); A(B, 12); \
-- A(B, 13); A(B, 14); A(B, 15); A(B, 16); A(B, 17); A(B, 18); \
-- A(B, 19); A(B, 20); A(B, 21); A(B, 22); A(B, 23); A(B, 24);
--
--DO24(MY_BENCHMARK_WITH_ARG, CacheFillPCRE)
--DO24(MY_BENCHMARK_WITH_ARG, CacheFillNFA)
--DO24(MY_BENCHMARK_WITH_ARG, CacheFillRE2)
--DO24(MY_BENCHMARK_WITH_ARG, CacheFillDFA)
--
--#undef DO24
--#undef MY_BENCHMARK_WITH_ARG
--*/
--
--////////////////////////////////////////////////////////////////////////
--//
--// Implementation routines. Sad that there are so many,
--// but all the interfaces are slightly different.
--
--// Runs implementation to search for regexp in text, iters times.
--// Expect_match says whether the regexp should be found.
--// Anchored says whether to run an anchored search.
--
--void SearchDFA(int iters, const char* regexp, const StringPiece& text,
-- Prog::Anchor anchor, bool expect_match) {
-- for (int i = 0; i < iters; i++) {
-- Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
-- CHECK(re);
-- Prog* prog = re->CompileToProg(0);
-- CHECK(prog);
-- bool failed = false;
-- CHECK_EQ(prog->SearchDFA(text, NULL, anchor, Prog::kFirstMatch,
-- NULL, &failed, NULL),
-- expect_match);
-- CHECK(!failed);
-- delete prog;
-- re->Decref();
-- }
--}
--
--void SearchNFA(int iters, const char* regexp, const StringPiece& text,
-- Prog::Anchor anchor, bool expect_match) {
-- for (int i = 0; i < iters; i++) {
-- Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
-- CHECK(re);
-- Prog* prog = re->CompileToProg(0);
-- CHECK(prog);
-- CHECK_EQ(prog->SearchNFA(text, NULL, anchor, Prog::kFirstMatch, NULL, 0),
-- expect_match);
-- delete prog;
-- re->Decref();
-- }
--}
--
--void SearchOnePass(int iters, const char* regexp, const StringPiece& text,
-- Prog::Anchor anchor, bool expect_match) {
-- for (int i = 0; i < iters; i++) {
-- Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
-- CHECK(re);
-- Prog* prog = re->CompileToProg(0);
-- CHECK(prog);
-- CHECK(prog->IsOnePass());
-- CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
-- expect_match);
-- delete prog;
-- re->Decref();
-- }
--}
--
--void SearchBitState(int iters, const char* regexp, const StringPiece& text,
-- Prog::Anchor anchor, bool expect_match) {
-- for (int i = 0; i < iters; i++) {
-- Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
-- CHECK(re);
-- Prog* prog = re->CompileToProg(0);
-- CHECK(prog);
-- CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
-- expect_match);
-- delete prog;
-- re->Decref();
-- }
--}
--
--void SearchPCRE(int iters, const char* regexp, const StringPiece& text,
-- Prog::Anchor anchor, bool expect_match) {
-- for (int i = 0; i < iters; i++) {
-- PCRE re(regexp, PCRE::UTF8);
-- CHECK_EQ(re.error(), "");
-- if (anchor == Prog::kAnchored)
-- CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
-- else
-- CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
-- }
--}
--
--void SearchRE2(int iters, const char* regexp, const StringPiece& text,
-- Prog::Anchor anchor, bool expect_match) {
-- for (int i = 0; i < iters; i++) {
-- RE2 re(regexp);
-- CHECK_EQ(re.error(), "");
-- if (anchor == Prog::kAnchored)
-- CHECK_EQ(RE2::FullMatch(text, re), expect_match);
-- else
-- CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
-- }
--}
--
--// SearchCachedXXX is like SearchXXX but only does the
--// regexp parsing and compiling once. This lets us measure
--// search time without the per-regexp overhead.
--
--void SearchCachedDFA(int iters, const char* regexp, const StringPiece& text,
-- Prog::Anchor anchor, bool expect_match) {
-- Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
-- CHECK(re);
-- Prog* prog = re->CompileToProg(1LL<<31);
-- CHECK(prog);
-- for (int i = 0; i < iters; i++) {
-- bool failed = false;
-- CHECK_EQ(prog->SearchDFA(text, NULL, anchor,
-- Prog::kFirstMatch, NULL, &failed, NULL),
-- expect_match);
-- CHECK(!failed);
-- }
-- delete prog;
-- re->Decref();
--}
--
--void SearchCachedNFA(int iters, const char* regexp, const StringPiece& text,
-- Prog::Anchor anchor, bool expect_match) {
-- Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
-- CHECK(re);
-- Prog* prog = re->CompileToProg(0);
-- CHECK(prog);
-- for (int i = 0; i < iters; i++) {
-- CHECK_EQ(prog->SearchNFA(text, NULL, anchor, Prog::kFirstMatch, NULL, 0),
-- expect_match);
-- }
-- delete prog;
-- re->Decref();
--}
--
--void SearchCachedOnePass(int iters, const char* regexp, const StringPiece& text,
-- Prog::Anchor anchor, bool expect_match) {
-- Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
-- CHECK(re);
-- Prog* prog = re->CompileToProg(0);
-- CHECK(prog);
-- CHECK(prog->IsOnePass());
-- for (int i = 0; i < iters; i++)
-- CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
-- expect_match);
-- delete prog;
-- re->Decref();
--}
--
--void SearchCachedBitState(int iters, const char* regexp, const StringPiece& text,
-- Prog::Anchor anchor, bool expect_match) {
-- Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
-- CHECK(re);
-- Prog* prog = re->CompileToProg(0);
-- CHECK(prog);
-- for (int i = 0; i < iters; i++)
-- CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
-- expect_match);
-- delete prog;
-- re->Decref();
--}
--
--void SearchCachedPCRE(int iters, const char* regexp, const StringPiece& text,
-- Prog::Anchor anchor, bool expect_match) {
-- PCRE re(regexp, PCRE::UTF8);
-- CHECK_EQ(re.error(), "");
-- for (int i = 0; i < iters; i++) {
-- if (anchor == Prog::kAnchored)
-- CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
-- else
-- CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
-- }
--}
--
--void SearchCachedRE2(int iters, const char* regexp, const StringPiece& text,
-- Prog::Anchor anchor, bool expect_match) {
-- RE2 re(regexp);
-- CHECK_EQ(re.error(), "");
-- for (int i = 0; i < iters; i++) {
-- if (anchor == Prog::kAnchored)
-- CHECK_EQ(RE2::FullMatch(text, re), expect_match);
-- else
-- CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
-- }
--}
--
--
--// Runs implementation to full match regexp against text,
--// extracting three submatches. Expects match always.
--
--void Parse3NFA(int iters, const char* regexp, const StringPiece& text) {
-- for (int i = 0; i < iters; i++) {
-- Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
-- CHECK(re);
-- Prog* prog = re->CompileToProg(0);
-- CHECK(prog);
-- StringPiece sp[4]; // 4 because sp[0] is whole match.
-- CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 4));
-- delete prog;
-- re->Decref();
-- }
--}
--
--void Parse3OnePass(int iters, const char* regexp, const StringPiece& text) {
-- for (int i = 0; i < iters; i++) {
-- Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
-- CHECK(re);
-- Prog* prog = re->CompileToProg(0);
-- CHECK(prog);
-- CHECK(prog->IsOnePass());
-- StringPiece sp[4]; // 4 because sp[0] is whole match.
-- CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
-- delete prog;
-- re->Decref();
-- }
--}
--
--void Parse3BitState(int iters, const char* regexp, const StringPiece& text) {
-- for (int i = 0; i < iters; i++) {
-- Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
-- CHECK(re);
-- Prog* prog = re->CompileToProg(0);
-- CHECK(prog);
-- StringPiece sp[4]; // 4 because sp[0] is whole match.
-- CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
-- delete prog;
-- re->Decref();
-- }
--}
--
--void Parse3Backtrack(int iters, const char* regexp, const StringPiece& text) {
-- for (int i = 0; i < iters; i++) {
-- Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
-- CHECK(re);
-- Prog* prog = re->CompileToProg(0);
-- CHECK(prog);
-- StringPiece sp[4]; // 4 because sp[0] is whole match.
-- CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
-- delete prog;
-- re->Decref();
-- }
--}
--
--void Parse3PCRE(int iters, const char* regexp, const StringPiece& text) {
-- for (int i = 0; i < iters; i++) {
-- PCRE re(regexp, PCRE::UTF8);
-- CHECK_EQ(re.error(), "");
-- StringPiece sp1, sp2, sp3;
-- CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
-- }
--}
--
--void Parse3RE2(int iters, const char* regexp, const StringPiece& text) {
-- for (int i = 0; i < iters; i++) {
-- RE2 re(regexp);
-- CHECK_EQ(re.error(), "");
-- StringPiece sp1, sp2, sp3;
-- CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
-- }
--}
--
--void Parse3CachedNFA(int iters, const char* regexp, const StringPiece& text) {
-- Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
-- CHECK(re);
-- Prog* prog = re->CompileToProg(0);
-- CHECK(prog);
-- StringPiece sp[4]; // 4 because sp[0] is whole match.
-- for (int i = 0; i < iters; i++) {
-- CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 4));
-- }
-- delete prog;
-- re->Decref();
--}
--
--void Parse3CachedOnePass(int iters, const char* regexp, const StringPiece& text) {
-- Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
-- CHECK(re);
-- Prog* prog = re->CompileToProg(0);
-- CHECK(prog);
-- CHECK(prog->IsOnePass());
-- StringPiece sp[4]; // 4 because sp[0] is whole match.
-- for (int i = 0; i < iters; i++)
-- CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
-- delete prog;
-- re->Decref();
--}
--
--void Parse3CachedBitState(int iters, const char* regexp, const StringPiece& text) {
-- Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
-- CHECK(re);
-- Prog* prog = re->CompileToProg(0);
-- CHECK(prog);
-- StringPiece sp[4]; // 4 because sp[0] is whole match.
-- for (int i = 0; i < iters; i++)
-- CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
-- delete prog;
-- re->Decref();
--}
--
--void Parse3CachedBacktrack(int iters, const char* regexp, const StringPiece& text) {
-- Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
-- CHECK(re);
-- Prog* prog = re->CompileToProg(0);
-- CHECK(prog);
-- StringPiece sp[4]; // 4 because sp[0] is whole match.
-- for (int i = 0; i < iters; i++)
-- CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
-- delete prog;
-- re->Decref();
--}
--
--void Parse3CachedPCRE(int iters, const char* regexp, const StringPiece& text) {
-- PCRE re(regexp, PCRE::UTF8);
-- CHECK_EQ(re.error(), "");
-- StringPiece sp1, sp2, sp3;
-- for (int i = 0; i < iters; i++) {
-- CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
-- }
--}
--
--void Parse3CachedRE2(int iters, const char* regexp, const StringPiece& text) {
-- RE2 re(regexp);
-- CHECK_EQ(re.error(), "");
-- StringPiece sp1, sp2, sp3;
-- for (int i = 0; i < iters; i++) {
-- CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
-- }
--}
--
--
--// Runs implementation to full match regexp against text,
--// extracting three submatches. Expects match always.
--
--void Parse1NFA(int iters, const char* regexp, const StringPiece& text) {
-- for (int i = 0; i < iters; i++) {
-- Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
-- CHECK(re);
-- Prog* prog = re->CompileToProg(0);
-- CHECK(prog);
-- StringPiece sp[2]; // 2 because sp[0] is whole match.
-- CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 2));
-- delete prog;
-- re->Decref();
-- }
--}
--
--void Parse1OnePass(int iters, const char* regexp, const StringPiece& text) {
-- for (int i = 0; i < iters; i++) {
-- Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
-- CHECK(re);
-- Prog* prog = re->CompileToProg(0);
-- CHECK(prog);
-- CHECK(prog->IsOnePass());
-- StringPiece sp[2]; // 2 because sp[0] is whole match.
-- CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
-- delete prog;
-- re->Decref();
-- }
--}
--
--void Parse1BitState(int iters, const char* regexp, const StringPiece& text) {
-- for (int i = 0; i < iters; i++) {
-- Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
-- CHECK(re);
-- Prog* prog = re->CompileToProg(0);
-- CHECK(prog);
-- StringPiece sp[2]; // 2 because sp[0] is whole match.
-- CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
-- delete prog;
-- re->Decref();
-- }
--}
--
--void Parse1PCRE(int iters, const char* regexp, const StringPiece& text) {
-- for (int i = 0; i < iters; i++) {
-- PCRE re(regexp, PCRE::UTF8);
-- CHECK_EQ(re.error(), "");
-- StringPiece sp1;
-- CHECK(PCRE::FullMatch(text, re, &sp1));
-- }
--}
--
--void Parse1RE2(int iters, const char* regexp, const StringPiece& text) {
-- for (int i = 0; i < iters; i++) {
-- RE2 re(regexp);
-- CHECK_EQ(re.error(), "");
-- StringPiece sp1;
-- CHECK(RE2::FullMatch(text, re, &sp1));
-- }
--}
--
--void Parse1CachedNFA(int iters, const char* regexp, const StringPiece& text) {
-- Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
-- CHECK(re);
-- Prog* prog = re->CompileToProg(0);
-- CHECK(prog);
-- StringPiece sp[2]; // 2 because sp[0] is whole match.
-- for (int i = 0; i < iters; i++) {
-- CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 2));
-- }
-- delete prog;
-- re->Decref();
--}
--
--void Parse1CachedOnePass(int iters, const char* regexp, const StringPiece& text) {
-- Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
-- CHECK(re);
-- Prog* prog = re->CompileToProg(0);
-- CHECK(prog);
-- CHECK(prog->IsOnePass());
-- StringPiece sp[2]; // 2 because sp[0] is whole match.
-- for (int i = 0; i < iters; i++)
-- CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
-- delete prog;
-- re->Decref();
--}
--
--void Parse1CachedBitState(int iters, const char* regexp, const StringPiece& text) {
-- Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
-- CHECK(re);
-- Prog* prog = re->CompileToProg(0);
-- CHECK(prog);
-- StringPiece sp[2]; // 2 because sp[0] is whole match.
-- for (int i = 0; i < iters; i++)
-- CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
-- delete prog;
-- re->Decref();
--}
--
--void Parse1CachedBacktrack(int iters, const char* regexp, const StringPiece& text) {
-- Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
-- CHECK(re);
-- Prog* prog = re->CompileToProg(0);
-- CHECK(prog);
-- StringPiece sp[2]; // 2 because sp[0] is whole match.
-- for (int i = 0; i < iters; i++)
-- CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
-- delete prog;
-- re->Decref();
--}
--
--void Parse1CachedPCRE(int iters, const char* regexp, const StringPiece& text) {
-- PCRE re(regexp, PCRE::UTF8);
-- CHECK_EQ(re.error(), "");
-- StringPiece sp1;
-- for (int i = 0; i < iters; i++) {
-- CHECK(PCRE::FullMatch(text, re, &sp1));
-- }
--}
--
--void Parse1CachedRE2(int iters, const char* regexp, const StringPiece& text) {
-- RE2 re(regexp);
-- CHECK_EQ(re.error(), "");
-- StringPiece sp1;
-- for (int i = 0; i < iters; i++) {
-- CHECK(RE2::FullMatch(text, re, &sp1));
-- }
--}
--
--void SearchParse2CachedPCRE(int iters, const char* regexp,
-- const StringPiece& text) {
-- PCRE re(regexp, PCRE::UTF8);
-- CHECK_EQ(re.error(), "");
-- for (int i = 0; i < iters; i++) {
-- StringPiece sp1, sp2;
-- CHECK(PCRE::PartialMatch(text, re, &sp1, &sp2));
-- }
--}
--
--void SearchParse2CachedRE2(int iters, const char* regexp,
-- const StringPiece& text) {
-- RE2 re(regexp);
-- CHECK_EQ(re.error(), "");
-- for (int i = 0; i < iters; i++) {
-- StringPiece sp1, sp2;
-- CHECK(RE2::PartialMatch(text, re, &sp1, &sp2));
-- }
--}
--
--void SearchParse1CachedPCRE(int iters, const char* regexp,
-- const StringPiece& text) {
-- PCRE re(regexp, PCRE::UTF8);
-- CHECK_EQ(re.error(), "");
-- for (int i = 0; i < iters; i++) {
-- StringPiece sp1;
-- CHECK(PCRE::PartialMatch(text, re, &sp1));
-- }
--}
--
--void SearchParse1CachedRE2(int iters, const char* regexp,
-- const StringPiece& text) {
-- RE2 re(regexp);
-- CHECK_EQ(re.error(), "");
-- for (int i = 0; i < iters; i++) {
-- StringPiece sp1;
-- CHECK(RE2::PartialMatch(text, re, &sp1));
-- }
--}
--
--void EmptyPartialMatchPCRE(int n) {
-- PCRE re("");
-- for (int i = 0; i < n; i++) {
-- PCRE::PartialMatch("", re);
-- }
--}
--
--void EmptyPartialMatchRE2(int n) {
-- RE2 re("");
-- for (int i = 0; i < n; i++) {
-- RE2::PartialMatch("", re);
-- }
--}
--#ifdef USEPCRE
--BENCHMARK(EmptyPartialMatchPCRE)->ThreadRange(1, NumCPUs());
--#endif
--BENCHMARK(EmptyPartialMatchRE2)->ThreadRange(1, NumCPUs());
--
--void SimplePartialMatchPCRE(int n) {
-- PCRE re("abcdefg");
-- for (int i = 0; i < n; i++) {
-- PCRE::PartialMatch("abcdefg", re);
-- }
--}
--
--void SimplePartialMatchRE2(int n) {
-- RE2 re("abcdefg");
-- for (int i = 0; i < n; i++) {
-- RE2::PartialMatch("abcdefg", re);
-- }
--}
--#ifdef USEPCRE
--BENCHMARK(SimplePartialMatchPCRE)->ThreadRange(1, NumCPUs());
--#endif
--BENCHMARK(SimplePartialMatchRE2)->ThreadRange(1, NumCPUs());
--
--static string http_text =
-- "GET /asdfhjasdhfasdlfhasdflkjasdfkljasdhflaskdjhf"
-- "alksdjfhasdlkfhasdlkjfhasdljkfhadsjklf HTTP/1.1";
--
--void HTTPPartialMatchPCRE(int n) {
-- StringPiece a;
-- PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
-- for (int i = 0; i < n; i++) {
-- PCRE::PartialMatch(http_text, re, &a);
-- }
--}
--
--void HTTPPartialMatchRE2(int n) {
-- StringPiece a;
-- RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
-- for (int i = 0; i < n; i++) {
-- RE2::PartialMatch(http_text, re, &a);
-- }
--}
--
--#ifdef USEPCRE
--BENCHMARK(HTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs());
--#endif
--BENCHMARK(HTTPPartialMatchRE2)->ThreadRange(1, NumCPUs());
--
--static string http_smalltext =
-- "GET /abc HTTP/1.1";
--
--void SmallHTTPPartialMatchPCRE(int n) {
-- StringPiece a;
-- PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
-- for (int i = 0; i < n; i++) {
-- PCRE::PartialMatch(http_text, re, &a);
-- }
--}
--
--void SmallHTTPPartialMatchRE2(int n) {
-- StringPiece a;
-- RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
-- for (int i = 0; i < n; i++) {
-- RE2::PartialMatch(http_text, re, &a);
-- }
--}
--
--#ifdef USEPCRE
--BENCHMARK(SmallHTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs());
--#endif
--BENCHMARK(SmallHTTPPartialMatchRE2)->ThreadRange(1, NumCPUs());
--
--void DotMatchPCRE(int n) {
-- StringPiece a;
-- PCRE re("(?-s)^(.+)");
-- for (int i = 0; i < n; i++) {
-- PCRE::PartialMatch(http_text, re, &a);
-- }
--}
--
--void DotMatchRE2(int n) {
-- StringPiece a;
-- RE2 re("(?-s)^(.+)");
-- for (int i = 0; i < n; i++) {
-- RE2::PartialMatch(http_text, re, &a);
-- }
--}
--
--#ifdef USEPCRE
--BENCHMARK(DotMatchPCRE)->ThreadRange(1, NumCPUs());
--#endif
--BENCHMARK(DotMatchRE2)->ThreadRange(1, NumCPUs());
--
--void ASCIIMatchPCRE(int n) {
-- StringPiece a;
-- PCRE re("(?-s)^([ -~]+)");
-- for (int i = 0; i < n; i++) {
-- PCRE::PartialMatch(http_text, re, &a);
-- }
--}
--
--void ASCIIMatchRE2(int n) {
-- StringPiece a;
-- RE2 re("(?-s)^([ -~]+)");
-- for (int i = 0; i < n; i++) {
-- RE2::PartialMatch(http_text, re, &a);
-- }
--}
--
--#ifdef USEPCRE
--BENCHMARK(ASCIIMatchPCRE)->ThreadRange(1, NumCPUs());
--#endif
--BENCHMARK(ASCIIMatchRE2)->ThreadRange(1, NumCPUs());
--
--void FullMatchPCRE(int iter, int n, const char *regexp) {
-- StopBenchmarkTiming();
-- string s;
-- MakeText(&s, n);
-- s += "ABCDEFGHIJ";
-- BenchmarkMemoryUsage();
-- PCRE re(regexp);
-- StartBenchmarkTiming();
-- for (int i = 0; i < iter; i++)
-- CHECK(PCRE::FullMatch(s, re));
-- SetBenchmarkBytesProcessed(static_cast<int64>(iter)*n);
--}
--
--void FullMatchRE2(int iter, int n, const char *regexp) {
-- StopBenchmarkTiming();
-- string s;
-- MakeText(&s, n);
-- s += "ABCDEFGHIJ";
-- BenchmarkMemoryUsage();
-- RE2 re(regexp, RE2::Latin1);
-- StartBenchmarkTiming();
-- for (int i = 0; i < iter; i++)
-- CHECK(RE2::FullMatch(s, re));
-- SetBenchmarkBytesProcessed(static_cast<int64>(iter)*n);
--}
--
--void FullMatch_DotStar_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?s).*"); }
--void FullMatch_DotStar_CachedRE2(int i, int n) { FullMatchRE2(i, n, "(?s).*"); }
--
--void FullMatch_DotStarDollar_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?s).*$"); }
--void FullMatch_DotStarDollar_CachedRE2(int i, int n) { FullMatchRE2(i, n, "(?s).*$"); }
--
--void FullMatch_DotStarCapture_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?s)((.*)()()($))"); }
--void FullMatch_DotStarCapture_CachedRE2(int i, int n) { FullMatchRE2(i, n, "(?s)((.*)()()($))"); }
--
--#ifdef USEPCRE
--BENCHMARK_RANGE(FullMatch_DotStar_CachedPCRE, 8, 2<<20);
--#endif
--BENCHMARK_RANGE(FullMatch_DotStar_CachedRE2, 8, 2<<20);
--
--#ifdef USEPCRE
--BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedPCRE, 8, 2<<20);
--#endif
--BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedRE2, 8, 2<<20);
--
--#ifdef USEPCRE
--BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedPCRE, 8, 2<<20);
--#endif
--BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedRE2, 8, 2<<20);
--
--} // namespace re2
-diff --git a/re2/re2/testing/regexp_generator.cc b/re2/re2/testing/regexp_generator.cc
-deleted file mode 100644
-index cf2db11..0000000
---- a/re2/re2/testing/regexp_generator.cc
-+++ /dev/null
-@@ -1,264 +0,0 @@
--// Copyright 2008 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Regular expression generator: generates all possible
--// regular expressions within parameters (see regexp_generator.h for details).
--
--// The regexp generator first generates a sequence of commands in a simple
--// postfix language. Each command in the language is a string,
--// like "a" or "%s*" or "%s|%s".
--//
--// To evaluate a command, enough arguments are popped from the value stack to
--// plug into the %s slots. Then the result is pushed onto the stack.
--// For example, the command sequence
--// a b %s%s c
--// results in the stack
--// ab c
--//
--// GeneratePostfix generates all possible command sequences.
--// Then RunPostfix turns each sequence into a regular expression
--// and passes the regexp to HandleRegexp.
--
--#include <string.h>
--#include <string>
--#include <stack>
--#include <vector>
--#include "util/test.h"
--#include "re2/testing/regexp_generator.h"
--
--namespace re2 {
--
--// Returns a vector of the egrep regexp operators.
--const vector<string>& RegexpGenerator::EgrepOps() {
-- static const char *ops[] = {
-- "%s%s",
-- "%s|%s",
-- "%s*",
-- "%s+",
-- "%s?",
-- "%s\\C*",
-- };
-- static vector<string> v(ops, ops + arraysize(ops));
-- return v;
--}
--
--RegexpGenerator::RegexpGenerator(int maxatoms, int maxops,
-- const vector<string>& atoms,
-- const vector<string>& ops)
-- : maxatoms_(maxatoms), maxops_(maxops), atoms_(atoms), ops_(ops) {
-- // Degenerate case.
-- if (atoms_.size() == 0)
-- maxatoms_ = 0;
-- if (ops_.size() == 0)
-- maxops_ = 0;
--}
--
--// Generates all possible regular expressions (within the parameters),
--// calling HandleRegexp for each one.
--void RegexpGenerator::Generate() {
-- vector<string> postfix;
-- GeneratePostfix(&postfix, 0, 0, 0);
--}
--
--// Generates random regular expressions, calling HandleRegexp for each one.
--void RegexpGenerator::GenerateRandom(int32 seed, int n) {
-- ACMRandom acm(seed);
-- acm_ = &acm;
--
-- for (int i = 0; i < n; i++) {
-- vector<string> postfix;
-- GenerateRandomPostfix(&postfix, 0, 0, 0);
-- }
--
-- acm_ = NULL;
--}
--
--// Counts and returns the number of occurrences of "%s" in s.
--static int CountArgs(const string& s) {
-- const char *p = s.c_str();
-- int n = 0;
-- while ((p = strstr(p, "%s")) != NULL) {
-- p += 2;
-- n++;
-- }
-- return n;
--}
--
--// Generates all possible postfix command sequences.
--// Each sequence is handed off to RunPostfix to generate a regular expression.
--// The arguments are:
--// post: the current postfix sequence
--// nstk: the number of elements that would be on the stack after executing
--// the sequence
--// ops: the number of operators used in the sequence
--// atoms: the number of atoms used in the sequence
--// For example, if post were ["a", "b", "%s%s", "c"],
--// then nstk = 2, ops = 1, atoms = 3.
--//
--// The initial call should be GeneratePostfix([empty vector], 0, 0, 0).
--//
--void RegexpGenerator::GeneratePostfix(vector<string>* post, int nstk,
-- int ops, int atoms) {
-- if (nstk == 1)
-- RunPostfix(*post);
--
-- // Early out: if used too many operators or can't
-- // get back down to a single expression on the stack
-- // using binary operators, give up.
-- if (ops + nstk - 1 > maxops_)
-- return;
--
-- // Add atoms if there is room.
-- if (atoms < maxatoms_) {
-- for (int i = 0; i < atoms_.size(); i++) {
-- post->push_back(atoms_[i]);
-- GeneratePostfix(post, nstk + 1, ops, atoms + 1);
-- post->pop_back();
-- }
-- }
--
-- // Add operators if there are enough arguments.
-- if (ops < maxops_) {
-- for (int i = 0; i < ops_.size(); i++) {
-- const string& fmt = ops_[i];
-- int nargs = CountArgs(fmt);
-- if (nargs <= nstk) {
-- post->push_back(fmt);
-- GeneratePostfix(post, nstk - nargs + 1, ops + 1, atoms);
-- post->pop_back();
-- }
-- }
-- }
--}
--
--// Generates a random postfix command sequence.
--// Stops and returns true once a single sequence has been generated.
--bool RegexpGenerator::GenerateRandomPostfix(vector<string> *post, int nstk,
-- int ops, int atoms) {
-- for (;;) {
-- // Stop if we get to a single element, but only sometimes.
-- if (nstk == 1 && acm_->Uniform(maxatoms_ + 1 - atoms) == 0) {
-- RunPostfix(*post);
-- return true;
-- }
--
-- // Early out: if used too many operators or can't
-- // get back down to a single expression on the stack
-- // using binary operators, give up.
-- if (ops + nstk - 1 > maxops_)
-- return false;
--
-- // Add operators if there are enough arguments.
-- if (ops < maxops_ && acm_->Uniform(2) == 0) {
-- const string& fmt = ops_[acm_->Uniform(ops_.size())];
-- int nargs = CountArgs(fmt);
-- if (nargs <= nstk) {
-- post->push_back(fmt);
-- bool ret = GenerateRandomPostfix(post, nstk - nargs + 1,
-- ops + 1, atoms);
-- post->pop_back();
-- if (ret)
-- return true;
-- }
-- }
--
-- // Add atoms if there is room.
-- if (atoms < maxatoms_ && acm_->Uniform(2) == 0) {
-- post->push_back(atoms_[acm_->Uniform(atoms_.size())]);
-- bool ret = GenerateRandomPostfix(post, nstk + 1, ops, atoms + 1);
-- post->pop_back();
-- if (ret)
-- return true;
-- }
-- }
--}
--
--// Interprets the postfix command sequence to create a regular expression
--// passed to HandleRegexp. The results of operators like %s|%s are wrapped
--// in (?: ) to avoid needing to maintain a precedence table.
--void RegexpGenerator::RunPostfix(const vector<string>& post) {
-- stack<string> regexps;
-- for (int i = 0; i < post.size(); i++) {
-- switch (CountArgs(post[i])) {
-- default:
-- LOG(FATAL) << "Bad operator: " << post[i];
-- case 0:
-- regexps.push(post[i]);
-- break;
-- case 1: {
-- string a = regexps.top();
-- regexps.pop();
-- regexps.push("(?:" + StringPrintf(post[i].c_str(), a.c_str()) + ")");
-- break;
-- }
-- case 2: {
-- string b = regexps.top();
-- regexps.pop();
-- string a = regexps.top();
-- regexps.pop();
-- regexps.push("(?:" +
-- StringPrintf(post[i].c_str(), a.c_str(), b.c_str()) +
-- ")");
-- break;
-- }
-- }
-- }
--
-- if (regexps.size() != 1) {
-- // Internal error - should never happen.
-- printf("Bad regexp program:\n");
-- for (int i = 0; i < post.size(); i++) {
-- printf(" %s\n", CEscape(post[i]).c_str());
-- }
-- printf("Stack after running program:\n");
-- while (!regexps.empty()) {
-- printf(" %s\n", CEscape(regexps.top()).c_str());
-- regexps.pop();
-- }
-- LOG(FATAL) << "Bad regexp program.";
-- }
--
-- HandleRegexp(regexps.top());
-- HandleRegexp("^(?:" + regexps.top() + ")$");
-- HandleRegexp("^(?:" + regexps.top() + ")");
-- HandleRegexp("(?:" + regexps.top() + ")$");
--}
--
--// Split s into an vector of strings, one for each UTF-8 character.
--vector<string> Explode(const StringPiece& s) {
-- vector<string> v;
--
-- for (const char *q = s.begin(); q < s.end(); ) {
-- const char* p = q;
-- Rune r;
-- q += chartorune(&r, q);
-- v.push_back(string(p, q - p));
-- }
--
-- return v;
--}
--
--// Split string everywhere a substring is found, returning
--// vector of pieces.
--vector<string> Split(const StringPiece& sep, const StringPiece& s) {
-- vector<string> v;
--
-- if (sep.size() == 0)
-- return Explode(s);
--
-- const char *p = s.begin();
-- for (const char *q = s.begin(); q + sep.size() <= s.end(); q++) {
-- if (StringPiece(q, sep.size()) == sep) {
-- v.push_back(string(p, q - p));
-- p = q + sep.size();
-- q = p - 1; // -1 for ++ in loop
-- continue;
-- }
-- }
-- if (p < s.end())
-- v.push_back(string(p, s.end() - p));
-- return v;
--}
--
--} // namespace re2
-diff --git a/re2/re2/testing/regexp_generator.h b/re2/re2/testing/regexp_generator.h
-deleted file mode 100644
-index b4506f2..0000000
---- a/re2/re2/testing/regexp_generator.h
-+++ /dev/null
-@@ -1,70 +0,0 @@
--// Copyright 2008 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Regular expression generator: generates all possible
--// regular expressions within given parameters (see below for details).
--
--#ifndef RE2_TESTING_REGEXP_GENERATOR_H__
--#define RE2_TESTING_REGEXP_GENERATOR_H__
--
--#include <string>
--#include <vector>
--#include "util/random.h"
--#include "util/util.h"
--#include "re2/stringpiece.h"
--
--namespace re2 {
--
--// Regular expression generator.
--//
--// Given a set of atom expressions like "a", "b", or "."
--// and operators like "%s*", generates all possible regular expressions
--// using at most maxbases base expressions and maxops operators.
--// For each such expression re, calls HandleRegexp(re).
--//
--// Callers are expected to subclass RegexpGenerator and provide HandleRegexp.
--//
--class RegexpGenerator {
-- public:
-- RegexpGenerator(int maxatoms, int maxops, const vector<string>& atoms,
-- const vector<string>& ops);
-- virtual ~RegexpGenerator() {}
--
-- // Generates all the regular expressions, calling HandleRegexp(re) for each.
-- void Generate();
--
-- // Generates n random regular expressions, calling HandleRegexp(re) for each.
-- void GenerateRandom(int32 seed, int n);
--
-- // Handles a regular expression. Must be provided by subclass.
-- virtual void HandleRegexp(const string& regexp) = 0;
--
-- // The egrep regexp operators: * + ? | and concatenation.
-- static const vector<string>& EgrepOps();
--
-- private:
-- void RunPostfix(const vector<string>& post);
-- void GeneratePostfix(vector<string>* post, int nstk, int ops, int lits);
-- bool GenerateRandomPostfix(vector<string>* post, int nstk, int ops, int lits);
--
-- int maxatoms_; // Maximum number of atoms allowed in expr.
-- int maxops_; // Maximum number of ops allowed in expr.
-- vector<string> atoms_; // Possible atoms.
-- vector<string> ops_; // Possible ops.
-- ACMRandom* acm_; // Random generator.
-- DISALLOW_EVIL_CONSTRUCTORS(RegexpGenerator);
--};
--
--// Helpers for preparing arguments to RegexpGenerator constructor.
--
--// Returns one string for each character in s.
--vector<string> Explode(const StringPiece& s);
--
--// Splits string everywhere sep is found, returning
--// vector of pieces.
--vector<string> Split(const StringPiece& sep, const StringPiece& s);
--
--} // namespace re2
--
--#endif // RE2_TESTING_REGEXP_GENERATOR_H__
-diff --git a/re2/re2/testing/regexp_test.cc b/re2/re2/testing/regexp_test.cc
-deleted file mode 100644
-index f317cbc..0000000
---- a/re2/re2/testing/regexp_test.cc
-+++ /dev/null
-@@ -1,81 +0,0 @@
--// Copyright 2006 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Test parse.cc, dump.cc, and tostring.cc.
--
--#include <string>
--#include <vector>
--#include "util/test.h"
--#include "re2/regexp.h"
--
--namespace re2 {
--
--// Test that overflowed ref counts work.
--TEST(Regexp, BigRef) {
-- Regexp* re;
-- re = Regexp::Parse("x", Regexp::NoParseFlags, NULL);
-- for (int i = 0; i < 100000; i++)
-- re->Incref();
-- for (int i = 0; i < 100000; i++)
-- re->Decref();
-- CHECK_EQ(re->Ref(), 1);
-- re->Decref();
--}
--
--// Test that very large Concats work.
--// Depends on overflowed ref counts working.
--TEST(Regexp, BigConcat) {
-- Regexp* x;
-- x = Regexp::Parse("x", Regexp::NoParseFlags, NULL);
-- vector<Regexp*> v(90000, x); // ToString bails out at 100000
-- for (int i = 0; i < v.size(); i++)
-- x->Incref();
-- CHECK_EQ(x->Ref(), 1 + v.size()) << x->Ref();
-- Regexp* re = Regexp::Concat(&v[0], v.size(), Regexp::NoParseFlags);
-- CHECK_EQ(re->ToString(), string(v.size(), 'x'));
-- re->Decref();
-- CHECK_EQ(x->Ref(), 1) << x->Ref();
-- x->Decref();
--}
--
--TEST(Regexp, NamedCaptures) {
-- Regexp* x;
-- RegexpStatus status;
-- x = Regexp::Parse(
-- "(?P<g1>a+)|(e)(?P<g2>w*)+(?P<g1>b+)", Regexp::PerlX, &status);
-- EXPECT_TRUE(status.ok());
-- EXPECT_EQ(4, x->NumCaptures());
-- const map<string, int>* have = x->NamedCaptures();
-- EXPECT_TRUE(have != NULL);
-- EXPECT_EQ(2, have->size()); // there are only two named groups in
-- // the regexp: 'g1' and 'g2'.
-- map<string, int> want;
-- want["g1"] = 1;
-- want["g2"] = 3;
-- EXPECT_EQ(want, *have);
-- x->Decref();
-- delete have;
--}
--
--TEST(Regexp, CaptureNames) {
-- Regexp* x;
-- RegexpStatus status;
-- x = Regexp::Parse(
-- "(?P<g1>a+)|(e)(?P<g2>w*)+(?P<g1>b+)", Regexp::PerlX, &status);
-- EXPECT_TRUE(status.ok());
-- EXPECT_EQ(4, x->NumCaptures());
-- const map<int, string>* have = x->CaptureNames();
-- EXPECT_TRUE(have != NULL);
-- EXPECT_EQ(3, have->size());
-- map<int, string> want;
-- want[1] = "g1";
-- want[3] = "g2";
-- want[4] = "g1";
--
-- EXPECT_EQ(want, *have);
-- x->Decref();
-- delete have;
--}
--
--} // namespace re2
-diff --git a/re2/re2/testing/required_prefix_test.cc b/re2/re2/testing/required_prefix_test.cc
-deleted file mode 100644
-index 1f0b216..0000000
---- a/re2/re2/testing/required_prefix_test.cc
-+++ /dev/null
-@@ -1,67 +0,0 @@
--// Copyright 2009 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--#include "util/test.h"
--#include "re2/regexp.h"
--
--namespace re2 {
--
--struct PrefixTest {
-- const char* regexp;
-- bool return_value;
-- const char* prefix;
-- bool foldcase;
-- const char* suffix;
--};
--
--static PrefixTest tests[] = {
-- // If the regexp is missing a ^, there's no required prefix.
-- { "abc", false },
-- { "", false },
-- { "(?m)^", false },
--
-- // If the regexp immediately goes into
-- // something not a literal match, there's no required prefix.
-- { "^(abc)", false },
-- { "^a*", false },
--
-- // Otherwise, it should work.
-- { "^abc$", true, "abc", false, "(?-m:$)" },
-- { "^abc", "true", "abc", false, "" },
-- { "^(?i)abc", true, "abc", true, "" },
-- { "^abcd*", true, "abc", false, "d*" },
-- { "^[Aa][Bb]cd*", true, "ab", true, "cd*" },
-- { "^ab[Cc]d*", true, "ab", false, "[Cc]d*" },
-- { "^☺abc", true, "☺abc", false, "" },
--};
--
--TEST(RequiredPrefix, SimpleTests) {
-- for (int i = 0; i < arraysize(tests); i++) {
-- const PrefixTest& t = tests[i];
-- for (int j = 0; j < 2; j++) {
-- Regexp::ParseFlags flags = Regexp::LikePerl;
-- if (j == 0)
-- flags = flags | Regexp::Latin1;
-- Regexp* re = Regexp::Parse(t.regexp, flags, NULL);
-- CHECK(re) << " " << t.regexp;
-- string p;
-- bool f = false;
-- Regexp* s = NULL;
-- CHECK_EQ(t.return_value, re->RequiredPrefix(&p, &f, &s))
-- << " " << t.regexp << " " << (j==0 ? "latin1" : "utf") << " " << re->Dump();
-- if (t.return_value) {
-- CHECK_EQ(p, string(t.prefix))
-- << " " << t.regexp << " " << (j==0 ? "latin1" : "utf");
-- CHECK_EQ(f, t.foldcase)
-- << " " << t.regexp << " " << (j==0 ? "latin1" : "utf");
-- CHECK_EQ(s->ToString(), string(t.suffix))
-- << " " << t.regexp << " " << (j==0 ? "latin1" : "utf");
-- s->Decref();
-- }
-- re->Decref();
-- }
-- }
--}
--
--} // namespace re2
-diff --git a/re2/re2/testing/search_test.cc b/re2/re2/testing/search_test.cc
-deleted file mode 100644
-index 3ab2ae3..0000000
---- a/re2/re2/testing/search_test.cc
-+++ /dev/null
-@@ -1,325 +0,0 @@
--// Copyright 2006-2007 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--#include <stdlib.h>
--#include <vector>
--#include "util/test.h"
--#include "re2/prog.h"
--#include "re2/regexp.h"
--#include "re2/testing/tester.h"
--#include "re2/testing/exhaustive_tester.h"
--
--namespace re2 {
--
--struct RegexpTest {
-- const char* regexp;
-- const char* text;
--};
--
--RegexpTest simple_tests[] = {
-- { "a", "a" },
-- { "a", "zyzzyva" },
-- { "a+", "aa" },
-- { "(a+|b)+", "ab" },
-- { "ab|cd", "xabcdx" },
-- { "h.*od?", "hello\ngoodbye\n" },
-- { "h.*o", "hello\ngoodbye\n" },
-- { "h.*o", "goodbye\nhello\n" },
-- { "h.*o", "hello world" },
-- { "h.*o", "othello, world" },
-- { "[^\\s\\S]", "aaaaaaa" },
-- { "a", "aaaaaaa" },
-- { "a*", "aaaaaaa" },
-- { "a*", "" },
-- { "a*", NULL },
-- { "ab|cd", "xabcdx" },
-- { "a", "cab" },
-- { "a*b", "cab" },
-- { "((((((((((((((((((((x))))))))))))))))))))", "x" },
-- { "[abcd]", "xxxabcdxxx" },
-- { "[^x]", "xxxabcdxxx" },
-- { "[abcd]+", "xxxabcdxxx" },
-- { "[^x]+", "xxxabcdxxx" },
-- { "(fo|foo)", "fo" },
-- { "(foo|fo)", "foo" },
--
-- { "aa", "aA" },
-- { "a", "Aa" },
-- { "a", "A" },
-- { "ABC", "abc" },
-- { "abc", "XABCY" },
-- { "ABC", "xabcy" },
--
-- // Make sure ^ and $ work.
-- // The pathological cases didn't work
-- // in the original grep code.
-- { "foo|bar|[A-Z]", "foo" },
-- { "^(foo|bar|[A-Z])", "foo" },
-- { "(foo|bar|[A-Z])$", "foo\n" },
-- { "(foo|bar|[A-Z])$", "foo" },
-- { "^(foo|bar|[A-Z])$", "foo\n" },
-- { "^(foo|bar|[A-Z])$", "foo" },
-- { "^(foo|bar|[A-Z])$", "bar" },
-- { "^(foo|bar|[A-Z])$", "X" },
-- { "^(foo|bar|[A-Z])$", "XY" },
-- { "^(fo|foo)$", "fo" },
-- { "^(fo|foo)$", "foo" },
-- { "^^(fo|foo)$", "fo" },
-- { "^^(fo|foo)$", "foo" },
-- { "^$", "" },
-- { "^$", "x" },
-- { "^^$", "" },
-- { "^$$", "" },
-- { "^^$", "x" },
-- { "^$$", "x" },
-- { "^^$$", "" },
-- { "^^$$", "x" },
-- { "^^^^^^^^$$$$$$$$", "" },
-- { "^", "x" },
-- { "$", "x" },
--
-- // Word boundaries.
-- { "\\bfoo\\b", "nofoo foo that" },
-- { "a\\b", "faoa x" },
-- { "\\bbar", "bar x" },
-- { "\\bbar", "foo\nbar x" },
-- { "bar\\b", "foobar" },
-- { "bar\\b", "foobar\nxxx" },
-- { "(foo|bar|[A-Z])\\b", "foo" },
-- { "(foo|bar|[A-Z])\\b", "foo\n" },
-- { "\\b", "" },
-- { "\\b", "x" },
-- { "\\b(foo|bar|[A-Z])", "foo" },
-- { "\\b(foo|bar|[A-Z])\\b", "X" },
-- { "\\b(foo|bar|[A-Z])\\b", "XY" },
-- { "\\b(foo|bar|[A-Z])\\b", "bar" },
-- { "\\b(foo|bar|[A-Z])\\b", "foo" },
-- { "\\b(foo|bar|[A-Z])\\b", "foo\n" },
-- { "\\b(foo|bar|[A-Z])\\b", "ffoo bbar N x" },
-- { "\\b(fo|foo)\\b", "fo" },
-- { "\\b(fo|foo)\\b", "foo" },
-- { "\\b\\b", "" },
-- { "\\b\\b", "x" },
-- { "\\b$", "" },
-- { "\\b$", "x" },
-- { "\\b$", "y x" },
-- { "\\b.$", "x" },
-- { "^\\b(fo|foo)\\b", "fo" },
-- { "^\\b(fo|foo)\\b", "foo" },
-- { "^\\b", "" },
-- { "^\\b", "x" },
-- { "^\\b\\b", "" },
-- { "^\\b\\b", "x" },
-- { "^\\b$", "" },
-- { "^\\b$", "x" },
-- { "^\\b.$", "x" },
-- { "^\\b.\\b$", "x" },
-- { "^^^^^^^^\\b$$$$$$$", "" },
-- { "^^^^^^^^\\b.$$$$$$", "x" },
-- { "^^^^^^^^\\b$$$$$$$", "x" },
--
-- // Non-word boundaries.
-- { "\\Bfoo\\B", "n foo xfoox that" },
-- { "a\\B", "faoa x" },
-- { "\\Bbar", "bar x" },
-- { "\\Bbar", "foo\nbar x" },
-- { "bar\\B", "foobar" },
-- { "bar\\B", "foobar\nxxx" },
-- { "(foo|bar|[A-Z])\\B", "foox" },
-- { "(foo|bar|[A-Z])\\B", "foo\n" },
-- { "\\B", "" },
-- { "\\B", "x" },
-- { "\\B(foo|bar|[A-Z])", "foo" },
-- { "\\B(foo|bar|[A-Z])\\B", "xXy" },
-- { "\\B(foo|bar|[A-Z])\\B", "XY" },
-- { "\\B(foo|bar|[A-Z])\\B", "XYZ" },
-- { "\\B(foo|bar|[A-Z])\\B", "abara" },
-- { "\\B(foo|bar|[A-Z])\\B", "xfoo_" },
-- { "\\B(foo|bar|[A-Z])\\B", "xfoo\n" },
-- { "\\B(foo|bar|[A-Z])\\B", "foo bar vNx" },
-- { "\\B(fo|foo)\\B", "xfoo" },
-- { "\\B(foo|fo)\\B", "xfooo" },
-- { "\\B\\B", "" },
-- { "\\B\\B", "x" },
-- { "\\B$", "" },
-- { "\\B$", "x" },
-- { "\\B$", "y x" },
-- { "\\B.$", "x" },
-- { "^\\B(fo|foo)\\B", "fo" },
-- { "^\\B(fo|foo)\\B", "foo" },
-- { "^\\B", "" },
-- { "^\\B", "x" },
-- { "^\\B\\B", "" },
-- { "^\\B\\B", "x" },
-- { "^\\B$", "" },
-- { "^\\B$", "x" },
-- { "^\\B.$", "x" },
-- { "^\\B.\\B$", "x" },
-- { "^^^^^^^^\\B$$$$$$$", "" },
-- { "^^^^^^^^\\B.$$$$$$", "x" },
-- { "^^^^^^^^\\B$$$$$$$", "x" },
--
-- // PCRE uses only ASCII for \b computation.
-- // All non-ASCII are *not* word characters.
-- { "\\bx\\b", "x" },
-- { "\\bx\\b", "x>" },
-- { "\\bx\\b", "<x" },
-- { "\\bx\\b", "<x>" },
-- { "\\bx\\b", "ax" },
-- { "\\bx\\b", "xb" },
-- { "\\bx\\b", "axb" },
-- { "\\bx\\b", "«x" },
-- { "\\bx\\b", "x»" },
-- { "\\bx\\b", "«x»" },
-- { "\\bx\\b", "axb" },
-- { "\\bx\\b", "áxβ" },
-- { "\\Bx\\B", "axb" },
-- { "\\Bx\\B", "áxβ" },
--
-- // Weird boundary cases.
-- { "^$^$", "" },
-- { "^$^", "" },
-- { "$^$", "" },
--
-- { "^$^$", "x" },
-- { "^$^", "x" },
-- { "$^$", "x" },
--
-- { "^$^$", "x\ny" },
-- { "^$^", "x\ny" },
-- { "$^$", "x\ny" },
--
-- { "^$^$", "x\n\ny" },
-- { "^$^", "x\n\ny" },
-- { "$^$", "x\n\ny" },
--
-- { "^(foo\\$)$", "foo$bar" },
-- { "(foo\\$)", "foo$bar" },
-- { "^...$", "abc" },
--
-- // UTF-8
-- { "^\xe6\x9c\xac$", "\xe6\x9c\xac" },
-- { "^...$", "\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e" },
-- { "^...$", ".\xe6\x9c\xac." },
--
-- { "^\\C\\C\\C$", "\xe6\x9c\xac" },
-- { "^\\C$", "\xe6\x9c\xac" },
-- { "^\\C\\C\\C$", "\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e" },
--
-- // Latin1
-- { "^...$", "\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e" },
-- { "^.........$", "\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e" },
-- { "^...$", ".\xe6\x9c\xac." },
-- { "^.....$", ".\xe6\x9c\xac." },
--
-- // Perl v Posix
-- { "\\B(fo|foo)\\B", "xfooo" },
-- { "(fo|foo)", "foo" },
--
-- // Octal escapes.
-- { "\\141", "a" },
-- { "\\060", "0" },
-- { "\\0600", "00" },
-- { "\\608", "08" },
-- { "\\01", "\01" },
-- { "\\018", "\01" "8" },
--
-- // Hexadecimal escapes
-- { "\\x{61}", "a" },
-- { "\\x61", "a" },
-- { "\\x{00000061}", "a" },
--
-- // Unicode scripts.
-- { "\\p{Greek}+", "aαβb" },
-- { "\\P{Greek}+", "aαβb" },
-- { "\\p{^Greek}+", "aαβb" },
-- { "\\P{^Greek}+", "aαβb" },
--
-- // Unicode properties. Nd is decimal number. N is any number.
-- { "[^0-9]+", "abc123" },
-- { "\\p{Nd}+", "abc123²³¼½¾₀₉" },
-- { "\\p{^Nd}+", "abc123²³¼½¾₀₉" },
-- { "\\P{Nd}+", "abc123²³¼½¾₀₉" },
-- { "\\P{^Nd}+", "abc123²³¼½¾₀₉" },
-- { "\\pN+", "abc123²³¼½¾₀₉" },
-- { "\\p{N}+", "abc123²³¼½¾₀₉" },
-- { "\\p{^N}+", "abc123²³¼½¾₀₉" },
--
-- { "\\p{Any}+", "abc123" },
--
-- // Character classes & case folding.
-- { "(?i)[@-A]+", "@AaB" }, // matches @Aa but not B
-- { "(?i)[A-Z]+", "aAzZ" },
-- { "(?i)[^\\\\]+", "Aa\\" }, // \\ is between A-Z and a-z -
-- // splits the ranges in an interesting way.
--
-- // would like to use, but PCRE mishandles in full-match, non-greedy mode
-- // { "(?i)[\\\\]+", "Aa" },
--
-- { "(?i)[acegikmoqsuwy]+", "acegikmoqsuwyACEGIKMOQSUWY" },
--
-- // Character classes & case folding.
-- { "[@-A]+", "@AaB" },
-- { "[A-Z]+", "aAzZ" },
-- { "[^\\\\]+", "Aa\\" },
-- { "[acegikmoqsuwy]+", "acegikmoqsuwyACEGIKMOQSUWY" },
--
-- // Anchoring. (^abc in aabcdef was a former bug)
-- // The tester checks for a match in the text and
-- // subpieces of the text with a byte removed on either side.
-- { "^abc", "abcdef" },
-- { "^abc", "aabcdef" },
-- { "^[ay]*[bx]+c", "abcdef" },
-- { "^[ay]*[bx]+c", "aabcdef" },
-- { "def$", "abcdef" },
-- { "def$", "abcdeff" },
-- { "d[ex][fy]$", "abcdef" },
-- { "d[ex][fy]$", "abcdeff" },
-- { "[dz][ex][fy]$", "abcdef" },
-- { "[dz][ex][fy]$", "abcdeff" },
-- { "(?m)^abc", "abcdef" },
-- { "(?m)^abc", "aabcdef" },
-- { "(?m)^[ay]*[bx]+c", "abcdef" },
-- { "(?m)^[ay]*[bx]+c", "aabcdef" },
-- { "(?m)def$", "abcdef" },
-- { "(?m)def$", "abcdeff" },
-- { "(?m)d[ex][fy]$", "abcdef" },
-- { "(?m)d[ex][fy]$", "abcdeff" },
-- { "(?m)[dz][ex][fy]$", "abcdef" },
-- { "(?m)[dz][ex][fy]$", "abcdeff" },
-- { "^", "a" },
-- { "^^", "a" },
--
-- // Context.
-- // The tester checks for a match in the text and
-- // subpieces of the text with a byte removed on either side.
-- { "a", "a" },
-- { "ab*", "a" },
-- { "a\\C*", "a" },
--
-- // Former bugs.
-- { "a\\C*|ba\\C", "baba" },
--};
--
--TEST(Regexp, SearchTests) {
-- int failures = 0;
-- for (int i = 0; i < arraysize(simple_tests); i++) {
-- const RegexpTest& t = simple_tests[i];
-- if (!TestRegexpOnText(t.regexp, t.text))
-- failures++;
--
--#ifdef LOGGING
-- // Build a dummy ExhaustiveTest call that will trigger just
-- // this one test, so that we log the test case.
-- vector<string> atom, alpha, ops;
-- atom.push_back(StringPiece(t.regexp).as_string());
-- alpha.push_back(StringPiece(t.text).as_string());
-- ExhaustiveTest(1, 0, atom, ops, 1, alpha, "", "");
--#endif
--
-- }
-- EXPECT_EQ(failures, 0);
--}
--
--} // namespace re2
-diff --git a/re2/re2/testing/set_test.cc b/re2/re2/testing/set_test.cc
-deleted file mode 100644
-index 89aed80..0000000
---- a/re2/re2/testing/set_test.cc
-+++ /dev/null
-@@ -1,102 +0,0 @@
--// Copyright 2010 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--#include <sys/types.h>
--#include <sys/stat.h>
--#include <vector>
--
--#include "util/test.h"
--#include "re2/re2.h"
--#include "re2/set.h"
--
--namespace re2 {
--
--TEST(Set, Unanchored) {
-- RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED);
--
-- CHECK_EQ(s.Add("foo", NULL), 0);
-- CHECK_EQ(s.Add("(", NULL), -1);
-- CHECK_EQ(s.Add("bar", NULL), 1);
--
-- CHECK_EQ(s.Compile(), true);
--
-- vector<int> v;
-- CHECK_EQ(s.Match("foobar", &v), true);
-- CHECK_EQ(v.size(), 2);
-- CHECK_EQ(v[0], 0);
-- CHECK_EQ(v[1], 1);
--
-- v.clear();
-- CHECK_EQ(s.Match("fooba", &v), true);
-- CHECK_EQ(v.size(), 1);
-- CHECK_EQ(v[0], 0);
--
-- v.clear();
-- CHECK_EQ(s.Match("oobar", &v), true);
-- CHECK_EQ(v.size(), 1);
-- CHECK_EQ(v[0], 1);
--}
--
--TEST(Set, UnanchoredFactored) {
-- RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED);
--
-- CHECK_EQ(s.Add("foo", NULL), 0);
-- CHECK_EQ(s.Add("(", NULL), -1);
-- CHECK_EQ(s.Add("foobar", NULL), 1);
--
-- CHECK_EQ(s.Compile(), true);
--
-- vector<int> v;
-- CHECK_EQ(s.Match("foobar", &v), true);
-- CHECK_EQ(v.size(), 2);
-- CHECK_EQ(v[0], 0);
-- CHECK_EQ(v[1], 1);
--
-- v.clear();
-- CHECK_EQ(s.Match("obarfoobaroo", &v), true);
-- CHECK_EQ(v.size(), 2);
-- CHECK_EQ(v[0], 0);
-- CHECK_EQ(v[1], 1);
--
-- v.clear();
-- CHECK_EQ(s.Match("fooba", &v), true);
-- CHECK_EQ(v.size(), 1);
-- CHECK_EQ(v[0], 0);
--
-- v.clear();
-- CHECK_EQ(s.Match("oobar", &v), false);
-- CHECK_EQ(v.size(), 0);
--}
--
--TEST(Set, Anchored) {
-- RE2::Set s(RE2::DefaultOptions, RE2::ANCHOR_BOTH);
--
-- CHECK_EQ(s.Add("foo", NULL), 0);
-- CHECK_EQ(s.Add("(", NULL), -1);
-- CHECK_EQ(s.Add("bar", NULL), 1);
--
-- CHECK_EQ(s.Compile(), true);
--
-- vector<int> v;
-- CHECK_EQ(s.Match("foobar", &v), false);
-- CHECK_EQ(v.size(), 0);
--
-- CHECK_EQ(s.Match("fooba", &v), false);
-- CHECK_EQ(v.size(), 0);
--
-- CHECK_EQ(s.Match("oobar", &v), false);
-- CHECK_EQ(v.size(), 0);
--
-- CHECK_EQ(s.Match("foo", &v), true);
-- CHECK_EQ(v.size(), 1);
-- CHECK_EQ(v[0], 0);
--
-- CHECK_EQ(s.Match("bar", &v), true);
-- CHECK_EQ(v.size(), 1);
-- CHECK_EQ(v[0], 1);
--
--}
--
--} // namespace re2
--
-diff --git a/re2/re2/testing/simplify_test.cc b/re2/re2/testing/simplify_test.cc
-deleted file mode 100644
-index d54837c..0000000
---- a/re2/re2/testing/simplify_test.cc
-+++ /dev/null
-@@ -1,167 +0,0 @@
--// Copyright 2006 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Test simplify.cc.
--
--#include <string>
--#include <vector>
--#include "util/test.h"
--#include "re2/regexp.h"
--
--namespace re2 {
--
--struct Test {
-- const char* regexp;
-- const char* simplified;
--};
--
--static Test tests[] = {
-- // Already-simple constructs
-- { "a", "a" },
-- { "ab", "ab" },
-- { "a|b", "[a-b]" },
-- { "ab|cd", "ab|cd" },
-- { "(ab)*", "(ab)*" },
-- { "(ab)+", "(ab)+" },
-- { "(ab)?", "(ab)?" },
-- { ".", "." },
-- { "^", "^" },
-- { "$", "$" },
-- { "[ac]", "[ac]" },
-- { "[^ac]", "[^ac]" },
--
-- // Posix character classes
-- { "[[:alnum:]]", "[0-9A-Za-z]" },
-- { "[[:alpha:]]", "[A-Za-z]" },
-- { "[[:blank:]]", "[\\t ]" },
-- { "[[:cntrl:]]", "[\\x00-\\x1f\\x7f]" },
-- { "[[:digit:]]", "[0-9]" },
-- { "[[:graph:]]", "[!-~]" },
-- { "[[:lower:]]", "[a-z]" },
-- { "[[:print:]]", "[ -~]" },
-- { "[[:punct:]]", "[!-/:-@\\[-`{-~]" },
-- { "[[:space:]]" , "[\\t-\\r ]" },
-- { "[[:upper:]]", "[A-Z]" },
-- { "[[:xdigit:]]", "[0-9A-Fa-f]" },
--
-- // Perl character classes
-- { "\\d", "[0-9]" },
-- { "\\s", "[\\t-\\n\\f-\\r ]" },
-- { "\\w", "[0-9A-Z_a-z]" },
-- { "\\D", "[^0-9]" },
-- { "\\S", "[^\\t-\\n\\f-\\r ]" },
-- { "\\W", "[^0-9A-Z_a-z]" },
-- { "[\\d]", "[0-9]" },
-- { "[\\s]", "[\\t-\\n\\f-\\r ]" },
-- { "[\\w]", "[0-9A-Z_a-z]" },
-- { "[\\D]", "[^0-9]" },
-- { "[\\S]", "[^\\t-\\n\\f-\\r ]" },
-- { "[\\W]", "[^0-9A-Z_a-z]" },
--
-- // Posix repetitions
-- { "a{1}", "a" },
-- { "a{2}", "aa" },
-- { "a{5}", "aaaaa" },
-- { "a{0,1}", "a?" },
-- // The next three are illegible because Simplify inserts (?:)
-- // parens instead of () parens to avoid creating extra
-- // captured subexpressions. The comments show a version fewer parens.
-- { "(a){0,2}", "(?:(a)(a)?)?" }, // (aa?)?
-- { "(a){0,4}", "(?:(a)(?:(a)(?:(a)(a)?)?)?)?" }, // (a(a(aa?)?)?)?
-- { "(a){2,6}", "(a)(a)(?:(a)(?:(a)(?:(a)(a)?)?)?)?" }, // aa(a(a(aa?)?)?)?
-- { "a{0,2}", "(?:aa?)?" }, // (aa?)?
-- { "a{0,4}", "(?:a(?:a(?:aa?)?)?)?" }, // (a(a(aa?)?)?)?
-- { "a{2,6}", "aa(?:a(?:a(?:aa?)?)?)?" }, // aa(a(a(aa?)?)?)?
-- { "a{0,}", "a*" },
-- { "a{1,}", "a+" },
-- { "a{2,}", "aa+" },
-- { "a{5,}", "aaaaa+" },
--
-- // Test that operators simplify their arguments.
-- // (Simplify used to not simplify arguments to a {} repeat.)
-- { "(?:a{1,}){1,}", "a+" },
-- { "(a{1,}b{1,})", "(a+b+)" },
-- { "a{1,}|b{1,}", "a+|b+" },
-- { "(?:a{1,})*", "(?:a+)*" },
-- { "(?:a{1,})+", "a+" },
-- { "(?:a{1,})?", "(?:a+)?" },
-- { "a{0}", "" },
--
-- // Character class simplification
-- { "[ab]", "[a-b]" },
-- { "[a-za-za-z]", "[a-z]" },
-- { "[A-Za-zA-Za-z]", "[A-Za-z]" },
-- { "[ABCDEFGH]", "[A-H]" },
-- { "[AB-CD-EF-GH]", "[A-H]" },
-- { "[W-ZP-XE-R]", "[E-Z]" },
-- { "[a-ee-gg-m]", "[a-m]" },
-- { "[a-ea-ha-m]", "[a-m]" },
-- { "[a-ma-ha-e]", "[a-m]" },
-- { "[a-zA-Z0-9 -~]", "[ -~]" },
--
-- // Empty character classes
-- { "[^[:cntrl:][:^cntrl:]]", "[^\\x00-\\x{10ffff}]" },
--
-- // Full character classes
-- { "[[:cntrl:][:^cntrl:]]", "." },
--
-- // Unicode case folding.
-- { "(?i)A", "[Aa]" },
-- { "(?i)a", "[Aa]" },
-- { "(?i)K", "[Kk\\x{212a}]" },
-- { "(?i)k", "[Kk\\x{212a}]" },
-- { "(?i)\\x{212a}", "[Kk\\x{212a}]" },
-- { "(?i)[a-z]", "[A-Za-z\\x{17f}\\x{212a}]" },
-- { "(?i)[\\x00-\\x{FFFD}]", "[\\x00-\\x{fffd}]" },
-- { "(?i)[\\x00-\\x{10ffff}]", "." },
--
-- // Empty string as a regular expression.
-- // Empty string must be preserved inside parens in order
-- // to make submatches work right, so these are less
-- // interesting than they used to be. ToString inserts
-- // explicit (?:) in place of non-parenthesized empty strings,
-- // to make them easier to spot for other parsers.
-- { "(a|b|)", "([a-b]|(?:))" },
-- { "(|)", "()" },
-- { "a()", "a()" },
-- { "(()|())", "(()|())" },
-- { "(a|)", "(a|(?:))" },
-- { "ab()cd()", "ab()cd()" },
-- { "()", "()" },
-- { "()*", "()*" },
-- { "()+", "()+" },
-- { "()?" , "()?" },
-- { "(){0}", "" },
-- { "(){1}", "()" },
-- { "(){1,}", "()+" },
-- { "(){0,2}", "(?:()()?)?" },
--};
--
--TEST(TestSimplify, SimpleRegexps) {
-- for (int i = 0; i < arraysize(tests); i++) {
-- RegexpStatus status;
-- VLOG(1) << "Testing " << tests[i].regexp;
-- Regexp* re = Regexp::Parse(tests[i].regexp,
-- Regexp::MatchNL | (Regexp::LikePerl &
-- ~Regexp::OneLine),
-- &status);
-- CHECK(re != NULL) << " " << tests[i].regexp << " " << status.Text();
-- Regexp* sre = re->Simplify();
-- CHECK(sre != NULL);
--
-- // Check that already-simple regexps don't allocate new ones.
-- if (strcmp(tests[i].regexp, tests[i].simplified) == 0) {
-- CHECK(re == sre) << " " << tests[i].regexp
-- << " " << re->ToString() << " " << sre->ToString();
-- }
--
-- EXPECT_EQ(tests[i].simplified, sre->ToString())
-- << " " << tests[i].regexp << " " << sre->Dump();
--
-- re->Decref();
-- sre->Decref();
-- }
--}
--
--} // namespace re2
-diff --git a/re2/re2/testing/string_generator.cc b/re2/re2/testing/string_generator.cc
-deleted file mode 100644
-index 5be6d3e..0000000
---- a/re2/re2/testing/string_generator.cc
-+++ /dev/null
-@@ -1,113 +0,0 @@
--// Copyright 2008 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// String generator: generates all possible strings of up to
--// maxlen letters using the set of letters in alpha.
--// Fetch strings using a Java-like Next()/HasNext() interface.
--
--#include <string>
--#include <vector>
--#include "util/test.h"
--#include "re2/testing/string_generator.h"
--
--namespace re2 {
--
--StringGenerator::StringGenerator(int maxlen, const vector<string>& alphabet)
-- : maxlen_(maxlen), alphabet_(alphabet),
-- generate_null_(false),
-- random_(false), nrandom_(0), acm_(NULL) {
--
-- // Degenerate case: no letters, no non-empty strings.
-- if (alphabet_.size() == 0)
-- maxlen_ = 0;
--
-- // Next() will return empty string (digits_ is empty).
-- hasnext_ = true;
--}
--
--StringGenerator::~StringGenerator() {
-- delete acm_;
--}
--
--// Resets the string generator state to the beginning.
--void StringGenerator::Reset() {
-- digits_.clear();
-- hasnext_ = true;
-- random_ = false;
-- nrandom_ = 0;
-- generate_null_ = false;
--}
--
--// Increments the big number in digits_, returning true if successful.
--// Returns false if all the numbers have been used.
--bool StringGenerator::IncrementDigits() {
-- // First try to increment the current number.
-- for (int i = digits_.size() - 1; i >= 0; i--) {
-- if (++digits_[i] < alphabet_.size())
-- return true;
-- digits_[i] = 0;
-- }
--
-- // If that failed, make a longer number.
-- if (digits_.size() < maxlen_) {
-- digits_.push_back(0);
-- return true;
-- }
--
-- return false;
--}
--
--// Generates random digits_, return true if successful.
--// Returns false if the random sequence is over.
--bool StringGenerator::RandomDigits() {
-- if (--nrandom_ <= 0)
-- return false;
--
-- // Pick length.
-- int len = acm_->Uniform(maxlen_+1);
-- digits_.resize(len);
-- for (int i = 0; i < len; i++)
-- digits_[i] = acm_->Uniform(alphabet_.size());
-- return true;
--}
--
--// Returns the next string in the iteration, which is the one
--// currently described by digits_. Calls IncrementDigits
--// after computing the string, so that it knows the answer
--// for subsequent HasNext() calls.
--const StringPiece& StringGenerator::Next() {
-- CHECK(hasnext_);
-- if (generate_null_) {
-- generate_null_ = false;
-- sp_ = NULL;
-- return sp_;
-- }
-- s_.clear();
-- for (int i = 0; i < digits_.size(); i++) {
-- s_ += alphabet_[digits_[i]];
-- }
-- hasnext_ = random_ ? RandomDigits() : IncrementDigits();
-- sp_ = s_;
-- return sp_;
--}
--
--// Sets generator up to return n random strings.
--void StringGenerator::Random(int32 seed, int n) {
-- if (acm_ == NULL)
-- acm_ = new ACMRandom(seed);
-- else
-- acm_->Reset(seed);
--
-- random_ = true;
-- nrandom_ = n;
-- hasnext_ = nrandom_ > 0;
--}
--
--void StringGenerator::GenerateNULL() {
-- generate_null_ = true;
-- hasnext_ = true;
--}
--
--} // namespace re2
--
-diff --git a/re2/re2/testing/string_generator.h b/re2/re2/testing/string_generator.h
-deleted file mode 100644
-index 6a9ef42..0000000
---- a/re2/re2/testing/string_generator.h
-+++ /dev/null
-@@ -1,58 +0,0 @@
--// Copyright 2008 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// String generator: generates all possible strings of up to
--// maxlen letters using the set of letters in alpha.
--// Fetch strings using a Java-like Next()/HasNext() interface.
--
--#ifndef RE2_TESTING_STRING_GENERATOR_H__
--#define RE2_TESTING_STRING_GENERATOR_H__
--
--#include <string>
--#include <vector>
--#include "util/util.h"
--#include "util/random.h"
--#include "re2/stringpiece.h"
--
--namespace re2 {
--
--class StringGenerator {
-- public:
-- StringGenerator(int maxlen, const vector<string>& alphabet);
-- ~StringGenerator();
-- const StringPiece& Next();
-- bool HasNext() { return hasnext_; }
--
-- // Resets generator to start sequence over.
-- void Reset();
--
-- // Causes generator to emit random strings for next n calls to Next().
-- void Random(int32 seed, int n);
--
-- // Causes generator to emit a NULL as the next call.
-- void GenerateNULL();
--
-- private:
-- bool IncrementDigits();
-- bool RandomDigits();
--
-- // Global state.
-- int maxlen_; // Maximum length string to generate.
-- vector<string> alphabet_; // Alphabet, one string per letter.
--
-- // Iteration state.
-- StringPiece sp_; // Last StringPiece returned by Next().
-- string s_; // String data in last StringPiece returned by Next().
-- bool hasnext_; // Whether Next() can be called again.
-- vector<int> digits_; // Alphabet indices for next string.
-- bool generate_null_; // Whether to generate a NULL StringPiece next.
-- bool random_; // Whether generated strings are random.
-- int nrandom_; // Number of random strings left to generate.
-- ACMRandom* acm_; // Random number generator
-- DISALLOW_EVIL_CONSTRUCTORS(StringGenerator);
--};
--
--} // namespace re2
--
--#endif // RE2_TESTING_STRING_GENERATOR_H__
-diff --git a/re2/re2/testing/string_generator_test.cc b/re2/re2/testing/string_generator_test.cc
-deleted file mode 100644
-index d13401a..0000000
---- a/re2/re2/testing/string_generator_test.cc
-+++ /dev/null
-@@ -1,109 +0,0 @@
--// Copyright 2008 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Test StringGenerator.
--
--#include <stdlib.h>
--#include <string>
--#include <vector>
--#include "util/test.h"
--#include "re2/testing/string_generator.h"
--#include "re2/testing/regexp_generator.h"
--
--namespace re2 {
--
--// Returns i to the e.
--static int64 IntegerPower(int i, int e) {
-- int64 p = 1;
-- while (e-- > 0)
-- p *= i;
-- return p;
--}
--
--// Checks that for given settings of the string generator:
--// * it generates strings that are non-decreasing in length.
--// * strings of the same length are sorted in alphabet order.
--// * it doesn't generate the same string twice.
--// * it generates the right number of strings.
--//
--// If all of these hold, the StringGenerator is behaving.
--// Assumes that the alphabet is sorted, so that the generated
--// strings can just be compared lexicographically.
--static void RunTest(int len, string alphabet, bool donull) {
-- StringGenerator g(len, Explode(alphabet));
--
-- int n = 0;
-- int last_l = -1;
-- string last_s;
--
-- if (donull) {
-- g.GenerateNULL();
-- EXPECT_TRUE(g.HasNext());
-- StringPiece sp = g.Next();
-- EXPECT_EQ(sp.data(), static_cast<const char*>(NULL));
-- EXPECT_EQ(sp.size(), 0);
-- }
--
-- while (g.HasNext()) {
-- string s = g.Next().as_string();
-- n++;
--
-- // Check that all characters in s appear in alphabet.
-- for (const char *p = s.c_str(); *p != '\0'; ) {
-- Rune r;
-- p += chartorune(&r, p);
-- EXPECT_TRUE(utfrune(alphabet.c_str(), r) != NULL);
-- }
--
-- // Check that string is properly ordered w.r.t. previous string.
-- int l = utflen(s.c_str());
-- EXPECT_LE(l, len);
-- if (last_l < l) {
-- last_l = l;
-- } else {
-- EXPECT_EQ(last_l, l);
-- EXPECT_LT(last_s, s);
-- }
-- last_s = s;
-- }
--
-- // Check total string count.
-- int64 m = 0;
-- int alpha = utflen(alphabet.c_str());
-- if (alpha == 0) // Degenerate case.
-- len = 0;
-- for (int i = 0; i <= len; i++)
-- m += IntegerPower(alpha, i);
-- EXPECT_EQ(n, m);
--}
--
--TEST(StringGenerator, NoLength) {
-- RunTest(0, "abc", false);
--}
--
--TEST(StringGenerator, NoLengthNoAlphabet) {
-- RunTest(0, "", false);
--}
--
--TEST(StringGenerator, NoAlphabet) {
-- RunTest(5, "", false);
--}
--
--TEST(StringGenerator, Simple) {
-- RunTest(3, "abc", false);
--}
--
--TEST(StringGenerator, UTF8) {
-- RunTest(4, "abc\xE2\x98\xBA", false);
--}
--
--TEST(StringGenerator, GenNULL) {
-- RunTest(0, "abc", true);
-- RunTest(0, "", true);
-- RunTest(5, "", true);
-- RunTest(3, "abc", true);
-- RunTest(4, "abc\xE2\x98\xBA", true);
--}
--
--} // namespace re2
-diff --git a/re2/re2/testing/tester.cc b/re2/re2/testing/tester.cc
-deleted file mode 100644
-index 003dc5a..0000000
---- a/re2/re2/testing/tester.cc
-+++ /dev/null
-@@ -1,640 +0,0 @@
--// Copyright 2008 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Regular expression engine tester -- test all the implementations against each other.
--
--#include "util/util.h"
--#include "util/flags.h"
--#include "re2/testing/tester.h"
--#include "re2/prog.h"
--#include "re2/re2.h"
--#include "re2/regexp.h"
--
--DEFINE_bool(dump_prog, false, "dump regexp program");
--DEFINE_bool(log_okay, false, "log successful runs");
--DEFINE_bool(dump_rprog, false, "dump reversed regexp program");
--
--DEFINE_int32(max_regexp_failures, 100,
-- "maximum number of regexp test failures (-1 = unlimited)");
--
--DEFINE_string(regexp_engines, "", "pattern to select regexp engines to test");
--
--namespace re2 {
--
--enum {
-- kMaxSubmatch = 1+16, // $0...$16
--};
--
--const char* engine_types[kEngineMax] = {
-- "Backtrack",
-- "NFA",
-- "DFA",
-- "DFA1",
-- "OnePass",
-- "BitState",
-- "RE2",
-- "RE2a",
-- "RE2b",
-- "PCRE",
--};
--
--// Returns the name string for the type t.
--static string EngineString(Engine t) {
-- if (t < 0 || t >= arraysize(engine_types) || engine_types[t] == NULL) {
-- return StringPrintf("type%d", static_cast<int>(t));
-- }
-- return engine_types[t];
--}
--
--// Returns bit mask of engines to use.
--static uint32 Engines() {
-- static uint32 cached_engines;
-- static bool did_parse;
--
-- if (did_parse)
-- return cached_engines;
--
-- if (FLAGS_regexp_engines.empty()) {
-- cached_engines = ~0;
-- } else {
-- for (Engine i = static_cast<Engine>(0); i < kEngineMax; i++)
-- if (strstr(EngineString(i).c_str(), FLAGS_regexp_engines.c_str()))
-- cached_engines |= 1<<i;
-- }
--
-- if (cached_engines == 0)
-- LOG(INFO) << "Warning: no engines enabled.";
-- if (!UsingPCRE)
-- cached_engines &= ~(1<<kEnginePCRE);
-- for (Engine i = static_cast<Engine>(0); i < kEngineMax; i++) {
-- if (cached_engines & (1<<i))
-- LOG(INFO) << EngineString(i) << " enabled";
-- }
-- did_parse = true;
-- return cached_engines;
--}
--
--// The result of running a match.
--struct TestInstance::Result {
-- bool skipped; // test skipped: wasn't applicable
-- bool matched; // found a match
-- bool untrusted; // don't really trust the answer
-- bool have_submatch; // computed all submatch info
-- bool have_submatch0; // computed just submatch[0]
-- StringPiece submatch[kMaxSubmatch];
--};
--
--typedef TestInstance::Result Result;
--
--// Formats a single capture range s in text in the form (a,b)
--// where a and b are the starting and ending offsets of s in text.
--static string FormatCapture(const StringPiece& text, const StringPiece& s) {
-- if (s.begin() == NULL)
-- return "(?,?)";
-- return StringPrintf("(%d,%d)",
-- static_cast<int>(s.begin() - text.begin()),
-- static_cast<int>(s.end() - text.begin()));
--}
--
--// Returns whether text contains non-ASCII (>= 0x80) bytes.
--static bool NonASCII(const StringPiece& text) {
-- for (int i = 0; i < text.size(); i++)
-- if ((uint8)text[i] >= 0x80)
-- return true;
-- return false;
--}
--
--// Returns string representation of match kind.
--static string FormatKind(Prog::MatchKind kind) {
-- switch (kind) {
-- case Prog::kFullMatch:
-- return "full match";
-- case Prog::kLongestMatch:
-- return "longest match";
-- case Prog::kFirstMatch:
-- return "first match";
-- case Prog::kManyMatch:
-- return "many match";
-- }
-- return "???";
--}
--
--// Returns string representation of anchor kind.
--static string FormatAnchor(Prog::Anchor anchor) {
-- switch (anchor) {
-- case Prog::kAnchored:
-- return "anchored";
-- case Prog::kUnanchored:
-- return "unanchored";
-- }
-- return "???";
--}
--
--struct ParseMode {
-- Regexp::ParseFlags parse_flags;
-- string desc;
--};
--
--static const Regexp::ParseFlags single_line =
-- Regexp::LikePerl;
--static const Regexp::ParseFlags multi_line =
-- static_cast<Regexp::ParseFlags>(Regexp::LikePerl & ~Regexp::OneLine);
--
--static ParseMode parse_modes[] = {
-- { single_line, "single-line" },
-- { single_line|Regexp::Latin1, "single-line, latin1" },
-- { multi_line, "multiline" },
-- { multi_line|Regexp::NonGreedy, "multiline, nongreedy" },
-- { multi_line|Regexp::Latin1, "multiline, latin1" },
--};
--
--static string FormatMode(Regexp::ParseFlags flags) {
-- for (int i = 0; i < arraysize(parse_modes); i++)
-- if (parse_modes[i].parse_flags == flags)
-- return parse_modes[i].desc;
-- return StringPrintf("%#x", static_cast<uint>(flags));
--}
--
--// Constructs and saves all the matching engines that
--// will be required for the given tests.
--TestInstance::TestInstance(const StringPiece& regexp_str, Prog::MatchKind kind,
-- Regexp::ParseFlags flags)
-- : regexp_str_(regexp_str),
-- kind_(kind),
-- flags_(flags),
-- error_(false),
-- regexp_(NULL),
-- num_captures_(0),
-- prog_(NULL),
-- rprog_(NULL),
-- re_(NULL),
-- re2_(NULL) {
--
-- VLOG(1) << CEscape(regexp_str);
--
-- // Compile regexp to prog.
-- // Always required - needed for backtracking (reference implementation).
-- RegexpStatus status;
-- regexp_ = Regexp::Parse(regexp_str, flags, &status);
-- if (regexp_ == NULL) {
-- LOG(INFO) << "Cannot parse: " << CEscape(regexp_str_)
-- << " mode: " << FormatMode(flags);
-- error_ = true;
-- return;
-- }
-- num_captures_ = regexp_->NumCaptures();
-- prog_ = regexp_->CompileToProg(0);
-- if (prog_ == NULL) {
-- LOG(INFO) << "Cannot compile: " << CEscape(regexp_str_);
-- error_ = true;
-- return;
-- }
-- if (FLAGS_dump_prog) {
-- LOG(INFO) << "Prog for "
-- << " regexp "
-- << CEscape(regexp_str_)
-- << " (" << FormatKind(kind_)
-- << ", " << FormatMode(flags_)
-- << ")\n"
-- << prog_->Dump();
-- }
--
-- // Compile regexp to reversed prog. Only needed for DFA engines.
-- if (Engines() & ((1<<kEngineDFA)|(1<<kEngineDFA1))) {
-- rprog_ = regexp_->CompileToReverseProg(0);
-- if (rprog_ == NULL) {
-- LOG(INFO) << "Cannot reverse compile: " << CEscape(regexp_str_);
-- error_ = true;
-- return;
-- }
-- if (FLAGS_dump_rprog)
-- LOG(INFO) << rprog_->Dump();
-- }
--
-- // Create re string that will be used for RE and RE2.
-- string re = regexp_str.as_string();
-- // Accomodate flags.
-- // Regexp::Latin1 will be accomodated below.
-- if (!(flags & Regexp::OneLine))
-- re = "(?m)" + re;
-- if (flags & Regexp::NonGreedy)
-- re = "(?U)" + re;
-- if (flags & Regexp::DotNL)
-- re = "(?s)" + re;
--
-- // Compile regexp to RE2.
-- if (Engines() & ((1<<kEngineRE2)|(1<<kEngineRE2a)|(1<<kEngineRE2b))) {
-- RE2::Options options;
-- if (flags & Regexp::Latin1)
-- options.set_encoding(RE2::Options::EncodingLatin1);
-- if (kind_ == Prog::kLongestMatch)
-- options.set_longest_match(true);
-- re2_ = new RE2(re, options);
-- if (!re2_->error().empty()) {
-- LOG(INFO) << "Cannot RE2: " << CEscape(re);
-- error_ = true;
-- return;
-- }
-- }
--
-- // Compile regexp to RE.
-- // PCRE as exposed by the RE interface isn't always usable.
-- // 1. It disagrees about handling of empty-string reptitions
-- // like matching (a*)* against "b". PCRE treats the (a*) as
-- // occurring once, while we treat it as occurring not at all.
-- // 2. It treats $ as this weird thing meaning end of string
-- // or before the \n at the end of the string.
-- // 3. It doesn't implement POSIX leftmost-longest matching.
-- // MimicsPCRE() detects 1 and 2.
-- if ((Engines() & (1<<kEnginePCRE)) && regexp_->MimicsPCRE() &&
-- kind_ != Prog::kLongestMatch) {
-- PCRE_Options o;
-- o.set_option(PCRE::UTF8);
-- if (flags & Regexp::Latin1)
-- o.set_option(PCRE::None);
-- // PCRE has interface bug keeping us from finding $0, so
-- // add one more layer of parens.
-- re_ = new PCRE("("+re+")", o);
-- if (!re_->error().empty()) {
-- LOG(INFO) << "Cannot PCRE: " << CEscape(re);
-- error_ = true;
-- return;
-- }
-- }
--}
--
--TestInstance::~TestInstance() {
-- if (regexp_)
-- regexp_->Decref();
-- delete prog_;
-- delete rprog_;
-- delete re_;
-- delete re2_;
--}
--
--// Runs a single search using the named engine type.
--// This interface hides all the irregularities of the various
--// engine interfaces from the rest of this file.
--void TestInstance::RunSearch(Engine type,
-- const StringPiece& orig_text,
-- const StringPiece& orig_context,
-- Prog::Anchor anchor,
-- Result *result) {
-- memset(result, 0, sizeof *result);
-- if (regexp_ == NULL) {
-- result->skipped = true;
-- return;
-- }
-- int nsubmatch = 1 + num_captures_; // NumCaptures doesn't count $0
-- if (nsubmatch > kMaxSubmatch)
-- nsubmatch = kMaxSubmatch;
--
-- StringPiece text = orig_text;
-- StringPiece context = orig_context;
--
-- switch (type) {
-- default:
-- LOG(FATAL) << "Bad RunSearch type: " << (int)type;
--
-- case kEngineBacktrack:
-- if (prog_ == NULL) {
-- result->skipped = true;
-- break;
-- }
-- result->matched =
-- prog_->UnsafeSearchBacktrack(text, context, anchor, kind_,
-- result->submatch, nsubmatch);
-- result->have_submatch = true;
-- break;
--
-- case kEngineNFA:
-- if (prog_ == NULL) {
-- result->skipped = true;
-- break;
-- }
-- result->matched =
-- prog_->SearchNFA(text, context, anchor, kind_,
-- result->submatch, nsubmatch);
-- result->have_submatch = true;
-- break;
--
-- case kEngineDFA:
-- if (prog_ == NULL) {
-- result->skipped = true;
-- break;
-- }
-- result->matched = prog_->SearchDFA(text, context, anchor, kind_, NULL,
-- &result->skipped, NULL);
-- break;
--
-- case kEngineDFA1:
-- if (prog_ == NULL || rprog_ == NULL) {
-- result->skipped = true;
-- break;
-- }
-- result->matched =
-- prog_->SearchDFA(text, context, anchor, kind_, result->submatch,
-- &result->skipped, NULL);
-- // If anchored, no need for second run,
-- // but do it anyway to find more bugs.
-- if (result->matched) {
-- if (!rprog_->SearchDFA(result->submatch[0], context,
-- Prog::kAnchored, Prog::kLongestMatch,
-- result->submatch,
-- &result->skipped, NULL)) {
-- LOG(ERROR) << "Reverse DFA inconsistency: " << CEscape(regexp_str_)
-- << " on " << CEscape(text);
-- result->matched = false;
-- }
-- }
-- result->have_submatch0 = true;
-- break;
--
-- case kEngineOnePass:
-- if (prog_ == NULL ||
-- anchor == Prog::kUnanchored ||
-- !prog_->IsOnePass() ||
-- nsubmatch > Prog::kMaxOnePassCapture) {
-- result->skipped = true;
-- break;
-- }
-- result->matched = prog_->SearchOnePass(text, context, anchor, kind_,
-- result->submatch, nsubmatch);
-- result->have_submatch = true;
-- break;
--
-- case kEngineBitState:
-- if (prog_ == NULL) {
-- result->skipped = true;
-- break;
-- }
-- result->matched = prog_->SearchBitState(text, context, anchor, kind_,
-- result->submatch, nsubmatch);
-- result->have_submatch = true;
-- break;
--
-- case kEngineRE2:
-- case kEngineRE2a:
-- case kEngineRE2b: {
-- if (!re2_ || text.end() != context.end()) {
-- result->skipped = true;
-- break;
-- }
--
-- RE2::Anchor re_anchor;
-- if (anchor == Prog::kAnchored)
-- re_anchor = RE2::ANCHOR_START;
-- else
-- re_anchor = RE2::UNANCHORED;
-- if (kind_ == Prog::kFullMatch)
-- re_anchor = RE2::ANCHOR_BOTH;
--
-- result->matched = re2_->Match(context,
-- text.begin() - context.begin(),
-- text.end() - context.begin(),
-- re_anchor, result->submatch, nsubmatch);
-- result->have_submatch = nsubmatch > 0;
-- break;
-- }
--
-- case kEnginePCRE: {
-- if (!re_ || text.begin() != context.begin() ||
-- text.end() != context.end()) {
-- result->skipped = true;
-- break;
-- }
--
-- const PCRE::Arg **argptr = new const PCRE::Arg*[nsubmatch];
-- PCRE::Arg *a = new PCRE::Arg[nsubmatch];
-- for (int i = 0; i < nsubmatch; i++) {
-- a[i] = PCRE::Arg(&result->submatch[i]);
-- argptr[i] = &a[i];
-- }
-- int consumed;
-- PCRE::Anchor pcre_anchor;
-- if (anchor == Prog::kAnchored)
-- pcre_anchor = PCRE::ANCHOR_START;
-- else
-- pcre_anchor = PCRE::UNANCHORED;
-- if (kind_ == Prog::kFullMatch)
-- pcre_anchor = PCRE::ANCHOR_BOTH;
-- re_->ClearHitLimit();
-- result->matched =
-- re_->DoMatch(text,
-- pcre_anchor,
-- &consumed,
-- argptr, nsubmatch);
-- if (re_->HitLimit()) {
-- result->untrusted = true;
-- delete[] argptr;
-- delete[] a;
-- break;
-- }
-- result->have_submatch = true;
--
-- // Work around RE interface bug: PCRE returns -1 as the
-- // offsets for an unmatched subexpression, and RE should
-- // turn that into StringPiece(NULL) but in fact it uses
-- // StringPiece(text.begin() - 1, 0). Oops.
-- for (int i = 0; i < nsubmatch; i++)
-- if (result->submatch[i].begin() == text.begin() - 1)
-- result->submatch[i] = NULL;
-- delete[] argptr;
-- delete[] a;
-- break;
-- }
-- }
--
-- if (!result->matched)
-- memset(result->submatch, 0, sizeof result->submatch);
--}
--
--// Checks whether r is okay given that correct is the right answer.
--// Specifically, r's answers have to match (but it doesn't have to
--// claim to have all the answers).
--static bool ResultOkay(const Result& r, const Result& correct) {
-- if (r.skipped)
-- return true;
-- if (r.matched != correct.matched)
-- return false;
-- if (r.have_submatch || r.have_submatch0) {
-- for (int i = 0; i < kMaxSubmatch; i++) {
-- if (correct.submatch[i].begin() != r.submatch[i].begin() ||
-- correct.submatch[i].size() != r.submatch[i].size())
-- return false;
-- if (!r.have_submatch)
-- break;
-- }
-- }
-- return true;
--}
--
--// Runs a single test.
--bool TestInstance::RunCase(const StringPiece& text, const StringPiece& context,
-- Prog::Anchor anchor) {
-- // Backtracking is the gold standard.
-- Result correct;
-- RunSearch(kEngineBacktrack, text, context, anchor, &correct);
-- if (correct.skipped) {
-- if (regexp_ == NULL)
-- return true;
-- LOG(ERROR) << "Skipped backtracking! " << CEscape(regexp_str_)
-- << " " << FormatMode(flags_);
-- return false;
-- }
-- VLOG(1) << "Try: regexp " << CEscape(regexp_str_)
-- << " text " << CEscape(text)
-- << " (" << FormatKind(kind_)
-- << ", " << FormatAnchor(anchor)
-- << ", " << FormatMode(flags_)
-- << ")";
--
-- // Compare the others.
-- bool all_okay = true;
-- for (Engine i = kEngineBacktrack+1; i < kEngineMax; i++) {
-- if (!(Engines() & (1<<i)))
-- continue;
--
-- Result r;
-- RunSearch(i, text, context, anchor, &r);
-- if (ResultOkay(r, correct)) {
-- if (FLAGS_log_okay)
-- LogMatch(r.skipped ? "Skipped: " : "Okay: ", i, text, context, anchor);
-- continue;
-- }
--
-- // We disagree with PCRE on the meaning of some Unicode matches.
-- // In particular, we treat all non-ASCII UTF-8 as word characters.
-- // We also treat "empty" character sets like [^\w\W] as being
-- // impossible to match, while PCRE apparently excludes some code
-- // points (e.g., 0x0080) from both \w and \W.
-- if (i == kEnginePCRE && NonASCII(text))
-- continue;
--
-- if (!r.untrusted)
-- all_okay = false;
--
-- LogMatch(r.untrusted ? "(Untrusted) Mismatch: " : "Mismatch: ", i, text,
-- context, anchor);
-- if (r.matched != correct.matched) {
-- if (r.matched) {
-- LOG(INFO) << " Should not match (but does).";
-- } else {
-- LOG(INFO) << " Should match (but does not).";
-- continue;
-- }
-- }
-- for (int i = 0; i < 1+num_captures_; i++) {
-- if (r.submatch[i].begin() != correct.submatch[i].begin() ||
-- r.submatch[i].end() != correct.submatch[i].end()) {
-- LOG(INFO) <<
-- StringPrintf(" $%d: should be %s is %s",
-- i,
-- FormatCapture(text, correct.submatch[i]).c_str(),
-- FormatCapture(text, r.submatch[i]).c_str());
-- } else {
-- LOG(INFO) <<
-- StringPrintf(" $%d: %s ok", i,
-- FormatCapture(text, r.submatch[i]).c_str());
-- }
-- }
-- }
--
-- if (!all_okay) {
-- if (FLAGS_max_regexp_failures > 0 && --FLAGS_max_regexp_failures == 0)
-- LOG(QFATAL) << "Too many regexp failures.";
-- }
--
-- return all_okay;
--}
--
--void TestInstance::LogMatch(const char* prefix, Engine e,
-- const StringPiece& text, const StringPiece& context,
-- Prog::Anchor anchor) {
-- LOG(INFO) << prefix
-- << EngineString(e)
-- << " regexp "
-- << CEscape(regexp_str_)
-- << " "
-- << CEscape(regexp_->ToString())
-- << " text "
-- << CEscape(text)
-- << " ("
-- << text.begin() - context.begin()
-- << ","
-- << text.end() - context.begin()
-- << ") of context "
-- << CEscape(context)
-- << " (" << FormatKind(kind_)
-- << ", " << FormatAnchor(anchor)
-- << ", " << FormatMode(flags_)
-- << ")";
--}
--
--static Prog::MatchKind kinds[] = {
-- Prog::kFirstMatch,
-- Prog::kLongestMatch,
-- Prog::kFullMatch,
--};
--
--// Test all possible match kinds and parse modes.
--Tester::Tester(const StringPiece& regexp) {
-- error_ = false;
-- for (int i = 0; i < arraysize(kinds); i++) {
-- for (int j = 0; j < arraysize(parse_modes); j++) {
-- TestInstance* t = new TestInstance(regexp, kinds[i],
-- parse_modes[j].parse_flags);
-- error_ |= t->error();
-- v_.push_back(t);
-- }
-- }
--}
--
--Tester::~Tester() {
-- for (int i = 0; i < v_.size(); i++)
-- delete v_[i];
--}
--
--bool Tester::TestCase(const StringPiece& text, const StringPiece& context,
-- Prog::Anchor anchor) {
-- bool okay = true;
-- for (int i = 0; i < v_.size(); i++)
-- okay &= (!v_[i]->error() && v_[i]->RunCase(text, context, anchor));
-- return okay;
--}
--
--static Prog::Anchor anchors[] = {
-- Prog::kAnchored,
-- Prog::kUnanchored
--};
--
--bool Tester::TestInput(const StringPiece& text) {
-- bool okay = TestInputInContext(text, text);
-- if (text.size() > 0) {
-- StringPiece sp;
-- sp = text;
-- sp.remove_prefix(1);
-- okay &= TestInputInContext(sp, text);
-- sp = text;
-- sp.remove_suffix(1);
-- okay &= TestInputInContext(sp, text);
-- }
-- return okay;
--}
--
--bool Tester::TestInputInContext(const StringPiece& text,
-- const StringPiece& context) {
-- bool okay = true;
-- for (int i = 0; i < arraysize(anchors); i++)
-- okay &= TestCase(text, context, anchors[i]);
-- return okay;
--}
--
--bool TestRegexpOnText(const StringPiece& regexp,
-- const StringPiece& text) {
-- Tester t(regexp);
-- return t.TestInput(text);
--}
--
--} // namespace re2
-diff --git a/re2/re2/testing/tester.h b/re2/re2/testing/tester.h
-deleted file mode 100644
-index 6e16e77..0000000
---- a/re2/re2/testing/tester.h
-+++ /dev/null
-@@ -1,121 +0,0 @@
--// Copyright 2008 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Comparative tester for regular expression matching.
--// Checks all implementations against each other.
--
--#ifndef RE2_TESTING_TESTER_H__
--#define RE2_TESTING_TESTER_H__
--
--#include "re2/stringpiece.h"
--#include "re2/prog.h"
--#include "re2/regexp.h"
--#include "re2/re2.h"
--#include "util/pcre.h"
--
--namespace re2 {
--
--class Regexp;
--
--// All the supported regexp engines.
--enum Engine {
-- kEngineBacktrack = 0, // Prog::BadSearchBacktrack
-- kEngineNFA, // Prog::SearchNFA
-- kEngineDFA, // Prog::SearchDFA, only ask whether it matched
-- kEngineDFA1, // Prog::SearchDFA, ask for match[0]
-- kEngineOnePass, // Prog::SearchOnePass, if applicable
-- kEngineBitState, // Prog::SearchBitState
-- kEngineRE2, // RE2, all submatches
-- kEngineRE2a, // RE2, only ask for match[0]
-- kEngineRE2b, // RE2, only ask whether it matched
-- kEnginePCRE, // PCRE (util/pcre.h)
--
-- kEngineMax,
--};
--
--// Make normal math on the enum preserve the type.
--// By default, C++ doesn't define ++ on enum, and e+1 has type int.
--static inline void operator++(Engine& e, int unused) {
-- e = static_cast<Engine>(e+1);
--}
--
--static inline Engine operator+(Engine e, int i) {
-- return static_cast<Engine>(static_cast<int>(e)+i);
--}
--
--// A TestInstance caches per-regexp state for a given
--// regular expression in a given configuration
--// (UTF-8 vs Latin1, longest vs first match, etc.).
--class TestInstance {
-- public:
-- struct Result;
--
-- TestInstance(const StringPiece& regexp, Prog::MatchKind kind,
-- Regexp::ParseFlags flags);
-- ~TestInstance();
-- Regexp::ParseFlags flags() { return flags_; }
-- bool error() { return error_; }
--
-- // Runs a single test case: search in text, which is in context,
-- // using the given anchoring.
-- bool RunCase(const StringPiece& text, const StringPiece& context,
-- Prog::Anchor anchor);
--
-- private:
-- // Runs a single search using the named engine type.
-- void RunSearch(Engine type,
-- const StringPiece& text, const StringPiece& context,
-- Prog::Anchor anchor,
-- Result *result);
--
-- void LogMatch(const char* prefix, Engine e, const StringPiece& text,
-- const StringPiece& context, Prog::Anchor anchor);
--
-- const StringPiece& regexp_str_; // regexp being tested
-- Prog::MatchKind kind_; // kind of match
-- Regexp::ParseFlags flags_; // flags for parsing regexp_str_
-- bool error_; // error during constructor?
--
-- Regexp* regexp_; // parsed regexp
-- int num_captures_; // regexp_->NumCaptures() cached
-- Prog* prog_; // compiled program
-- Prog* rprog_; // compiled reverse program
-- PCRE* re_; // PCRE implementation
-- RE2* re2_; // RE2 implementation
--
-- DISALLOW_EVIL_CONSTRUCTORS(TestInstance);
--};
--
--// A group of TestInstances for all possible configurations.
--class Tester {
-- public:
-- explicit Tester(const StringPiece& regexp);
-- ~Tester();
--
-- bool error() { return error_; }
--
-- // Runs a single test case: search in text, which is in context,
-- // using the given anchoring.
-- bool TestCase(const StringPiece& text, const StringPiece& context,
-- Prog::Anchor anchor);
--
-- // Run TestCase(text, text, anchor) for all anchoring modes.
-- bool TestInput(const StringPiece& text);
--
-- // Run TestCase(text, context, anchor) for all anchoring modes.
-- bool TestInputInContext(const StringPiece& text, const StringPiece& context);
--
-- private:
-- bool error_;
-- vector<TestInstance*> v_;
--
-- DISALLOW_EVIL_CONSTRUCTORS(Tester);
--};
--
--// Run all possible tests using regexp and text.
--bool TestRegexpOnText(const StringPiece& regexp, const StringPiece& text);
--
--} // namespace re2
--
--#endif // RE2_TESTING_TESTER_H__
-diff --git a/re2/re2/testing/unicode_test.py b/re2/re2/testing/unicode_test.py
-deleted file mode 100755
-index a88a3ad..0000000
---- a/re2/re2/testing/unicode_test.py
-+++ /dev/null
-@@ -1,207 +0,0 @@
--#!/usr/bin/python2.4
--#
--# Copyright 2008 The RE2 Authors. All Rights Reserved.
--# Use of this source code is governed by a BSD-style
--# license that can be found in the LICENSE file.
--
--"""Unittest for the util/regexp/re2/unicode.py module."""
--
--import os
--import StringIO
--from google3.pyglib import flags
--from google3.testing.pybase import googletest
--from google3.util.regexp.re2 import unicode
--
--_UNICODE_DIR = os.path.join(flags.FLAGS.test_srcdir, "google3", "third_party",
-- "unicode", "ucd-5.1.0")
--
--
--class ConvertTest(googletest.TestCase):
-- """Test the conversion functions."""
--
-- def testUInt(self):
-- self.assertEquals(0x0000, unicode._UInt("0000"))
-- self.assertEquals(0x263A, unicode._UInt("263A"))
-- self.assertEquals(0x10FFFF, unicode._UInt("10FFFF"))
-- self.assertRaises(unicode.InputError, unicode._UInt, "263")
-- self.assertRaises(unicode.InputError, unicode._UInt, "263AAAA")
-- self.assertRaises(unicode.InputError, unicode._UInt, "110000")
--
-- def testURange(self):
-- self.assertEquals([1, 2, 3], unicode._URange("0001..0003"))
-- self.assertEquals([1], unicode._URange("0001"))
-- self.assertRaises(unicode.InputError, unicode._URange, "0001..0003..0005")
-- self.assertRaises(unicode.InputError, unicode._URange, "0003..0001")
-- self.assertRaises(unicode.InputError, unicode._URange, "0001..0001")
--
-- def testUStr(self):
-- self.assertEquals("0x263A", unicode._UStr(0x263a))
-- self.assertEquals("0x10FFFF", unicode._UStr(0x10FFFF))
-- self.assertRaises(unicode.InputError, unicode._UStr, 0x110000)
-- self.assertRaises(unicode.InputError, unicode._UStr, -1)
--
--
--_UNICODE_TABLE = """# Commented line, should be ignored.
--# The next line is blank and should be ignored.
--
--0041;Capital A;Line 1
--0061..007A;Lowercase;Line 2
--1F00;<Greek, First>;Ignored
--1FFE;<Greek, Last>;Line 3
--10FFFF;Runemax;Line 4
--0000;Zero;Line 5
--"""
--
--_BAD_TABLE1 = """
--111111;Not a code point;
--"""
--
--_BAD_TABLE2 = """
--0000;<Zero, First>;Missing <Zero, Last>
--"""
--
--_BAD_TABLE3 = """
--0010..0001;Bad range;
--"""
--
--
--class AbortError(Exception):
-- """Function should not have been called."""
--
--
--def Abort():
-- raise AbortError("Abort")
--
--
--def StringTable(s, n, f):
-- unicode.ReadUnicodeTable(StringIO.StringIO(s), n, f)
--
--
--class ReadUnicodeTableTest(googletest.TestCase):
-- """Test the ReadUnicodeTable function."""
--
-- def testSimpleTable(self):
--
-- ncall = [0] # can't assign to ordinary int in DoLine
--
-- def DoLine(codes, fields):
-- self.assertEquals(3, len(fields))
-- ncall[0] += 1
-- self.assertEquals("Line %d" % (ncall[0],), fields[2])
-- if ncall[0] == 1:
-- self.assertEquals([0x0041], codes)
-- self.assertEquals("0041", fields[0])
-- self.assertEquals("Capital A", fields[1])
-- elif ncall[0] == 2:
-- self.assertEquals(range(0x0061, 0x007A + 1), codes)
-- self.assertEquals("0061..007A", fields[0])
-- self.assertEquals("Lowercase", fields[1])
-- elif ncall[0] == 3:
-- self.assertEquals(range(0x1F00, 0x1FFE + 1), codes)
-- self.assertEquals("1F00..1FFE", fields[0])
-- self.assertEquals("Greek", fields[1])
-- elif ncall[0] == 4:
-- self.assertEquals([0x10FFFF], codes)
-- self.assertEquals("10FFFF", fields[0])
-- self.assertEquals("Runemax", fields[1])
-- elif ncall[0] == 5:
-- self.assertEquals([0x0000], codes)
-- self.assertEquals("0000", fields[0])
-- self.assertEquals("Zero", fields[1])
--
-- StringTable(_UNICODE_TABLE, 3, DoLine)
-- self.assertEquals(5, ncall[0])
--
-- def testErrorTables(self):
-- self.assertRaises(unicode.InputError, StringTable, _UNICODE_TABLE, 4, Abort)
-- self.assertRaises(unicode.InputError, StringTable, _UNICODE_TABLE, 2, Abort)
-- self.assertRaises(unicode.InputError, StringTable, _BAD_TABLE1, 3, Abort)
-- self.assertRaises(unicode.InputError, StringTable, _BAD_TABLE2, 3, Abort)
-- self.assertRaises(unicode.InputError, StringTable, _BAD_TABLE3, 3, Abort)
--
--
--class ParseContinueTest(googletest.TestCase):
-- """Test the ParseContinue function."""
--
-- def testParseContinue(self):
-- self.assertEquals(("Private Use", "First"),
-- unicode._ParseContinue("<Private Use, First>"))
-- self.assertEquals(("Private Use", "Last"),
-- unicode._ParseContinue("<Private Use, Last>"))
-- self.assertEquals(("<Private Use, Blah>", None),
-- unicode._ParseContinue("<Private Use, Blah>"))
--
--
--class CaseGroupsTest(googletest.TestCase):
-- """Test the CaseGroups function (and the CaseFoldingReader)."""
--
-- def FindGroup(self, c):
-- if type(c) == str:
-- c = ord(c)
-- for g in self.groups:
-- if c in g:
-- return g
-- return None
--
-- def testCaseGroups(self):
-- self.groups = unicode.CaseGroups(unicode_dir=_UNICODE_DIR)
-- self.assertEquals([ord("A"), ord("a")], self.FindGroup("a"))
-- self.assertEquals(None, self.FindGroup("0"))
--
--
--class ScriptsTest(googletest.TestCase):
-- """Test the Scripts function (and the ScriptsReader)."""
--
-- def FindScript(self, c):
-- if type(c) == str:
-- c = ord(c)
-- for script, codes in self.scripts.items():
-- for code in codes:
-- if c == code:
-- return script
-- return None
--
-- def testScripts(self):
-- self.scripts = unicode.Scripts(unicode_dir=_UNICODE_DIR)
-- self.assertEquals("Latin", self.FindScript("a"))
-- self.assertEquals("Common", self.FindScript("0"))
-- self.assertEquals(None, self.FindScript(0xFFFE))
--
--
--class CategoriesTest(googletest.TestCase):
-- """Test the Categories function (and the UnicodeDataReader)."""
--
-- def FindCategory(self, c):
-- if type(c) == str:
-- c = ord(c)
-- short = None
-- for category, codes in self.categories.items():
-- for code in codes:
-- if code == c:
-- # prefer category Nd over N
-- if len(category) > 1:
-- return category
-- if short == None:
-- short = category
-- return short
--
-- def testCategories(self):
-- self.categories = unicode.Categories(unicode_dir=_UNICODE_DIR)
-- self.assertEquals("Ll", self.FindCategory("a"))
-- self.assertEquals("Nd", self.FindCategory("0"))
-- self.assertEquals("Lo", self.FindCategory(0xAD00)) # in First, Last range
-- self.assertEquals(None, self.FindCategory(0xFFFE))
-- self.assertEquals("Lo", self.FindCategory(0x8B5A))
-- self.assertEquals("Lo", self.FindCategory(0x6C38))
-- self.assertEquals("Lo", self.FindCategory(0x92D2))
-- self.assertTrue(ord("a") in self.categories["L"])
-- self.assertTrue(ord("0") in self.categories["N"])
-- self.assertTrue(0x8B5A in self.categories["L"])
-- self.assertTrue(0x6C38 in self.categories["L"])
-- self.assertTrue(0x92D2 in self.categories["L"])
--
--def main():
-- googletest.main()
--
--if __name__ == "__main__":
-- main()
-diff --git a/re2/re2/tostring.cc b/re2/re2/tostring.cc
-deleted file mode 100644
-index 555524f..0000000
---- a/re2/re2/tostring.cc
-+++ /dev/null
-@@ -1,341 +0,0 @@
--// Copyright 2006 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Format a regular expression structure as a string.
--// Tested by parse_test.cc
--
--#include "util/util.h"
--#include "re2/regexp.h"
--#include "re2/walker-inl.h"
--
--namespace re2 {
--
--enum {
-- PrecAtom,
-- PrecUnary,
-- PrecConcat,
-- PrecAlternate,
-- PrecEmpty,
-- PrecParen,
-- PrecToplevel,
--};
--
--// Helper function. See description below.
--static void AppendCCRange(string* t, Rune lo, Rune hi);
--
--// Walker to generate string in s_.
--// The arg pointers are actually integers giving the
--// context precedence.
--// The child_args are always NULL.
--class ToStringWalker : public Regexp::Walker<int> {
-- public:
-- explicit ToStringWalker(string* t) : t_(t) {}
--
-- virtual int PreVisit(Regexp* re, int parent_arg, bool* stop);
-- virtual int PostVisit(Regexp* re, int parent_arg, int pre_arg,
-- int* child_args, int nchild_args);
-- virtual int ShortVisit(Regexp* re, int parent_arg) {
-- return 0;
-- }
--
-- private:
-- string* t_; // The string the walker appends to.
--
-- DISALLOW_EVIL_CONSTRUCTORS(ToStringWalker);
--};
--
--string Regexp::ToString() {
-- string t;
-- ToStringWalker w(&t);
-- w.WalkExponential(this, PrecToplevel, 100000);
-- if (w.stopped_early())
-- t += " [truncated]";
-- return t;
--}
--
--#define ToString DontCallToString // Avoid accidental recursion.
--
--// Visits re before children are processed.
--// Appends ( if needed and passes new precedence to children.
--int ToStringWalker::PreVisit(Regexp* re, int parent_arg, bool* stop) {
-- int prec = parent_arg;
-- int nprec = PrecAtom;
--
-- switch (re->op()) {
-- case kRegexpNoMatch:
-- case kRegexpEmptyMatch:
-- case kRegexpLiteral:
-- case kRegexpAnyChar:
-- case kRegexpAnyByte:
-- case kRegexpBeginLine:
-- case kRegexpEndLine:
-- case kRegexpBeginText:
-- case kRegexpEndText:
-- case kRegexpWordBoundary:
-- case kRegexpNoWordBoundary:
-- case kRegexpCharClass:
-- case kRegexpHaveMatch:
-- nprec = PrecAtom;
-- break;
--
-- case kRegexpConcat:
-- case kRegexpLiteralString:
-- if (prec < PrecConcat)
-- t_->append("(?:");
-- nprec = PrecConcat;
-- break;
--
-- case kRegexpAlternate:
-- if (prec < PrecAlternate)
-- t_->append("(?:");
-- nprec = PrecAlternate;
-- break;
--
-- case kRegexpCapture:
-- t_->append("(");
-- if (re->name()) {
-- t_->append("?P<");
-- t_->append(*re->name());
-- t_->append(">");
-- }
-- nprec = PrecParen;
-- break;
--
-- case kRegexpStar:
-- case kRegexpPlus:
-- case kRegexpQuest:
-- case kRegexpRepeat:
-- if (prec < PrecUnary)
-- t_->append("(?:");
-- // The subprecedence here is PrecAtom instead of PrecUnary
-- // because PCRE treats two unary ops in a row as a parse error.
-- nprec = PrecAtom;
-- break;
-- }
--
-- return nprec;
--}
--
--static void AppendLiteral(string *t, Rune r, bool foldcase) {
-- if (r != 0 && r < 0x80 && strchr("(){}[]*+?|.^$\\", r)) {
-- t->append(1, '\\');
-- t->append(1, r);
-- } else if (foldcase && 'a' <= r && r <= 'z') {
-- if ('a' <= r && r <= 'z')
-- r += 'A' - 'a';
-- t->append(1, '[');
-- t->append(1, r);
-- t->append(1, r + 'a' - 'A');
-- t->append(1, ']');
-- } else {
-- AppendCCRange(t, r, r);
-- }
--}
--
--// Visits re after children are processed.
--// For childless regexps, all the work is done here.
--// For regexps with children, append any unary suffixes or ).
--int ToStringWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg,
-- int* child_args, int nchild_args) {
-- int prec = parent_arg;
-- switch (re->op()) {
-- case kRegexpNoMatch:
-- // There's no simple symbol for "no match", but
-- // [^0-Runemax] excludes everything.
-- t_->append("[^\\x00-\\x{10ffff}]");
-- break;
--
-- case kRegexpEmptyMatch:
-- // Append (?:) to make empty string visible,
-- // unless this is already being parenthesized.
-- if (prec < PrecEmpty)
-- t_->append("(?:)");
-- break;
--
-- case kRegexpLiteral:
-- AppendLiteral(t_, re->rune(), re->parse_flags() & Regexp::FoldCase);
-- break;
--
-- case kRegexpLiteralString:
-- for (int i = 0; i < re->nrunes(); i++)
-- AppendLiteral(t_, re->runes()[i], re->parse_flags() & Regexp::FoldCase);
-- if (prec < PrecConcat)
-- t_->append(")");
-- break;
--
-- case kRegexpConcat:
-- if (prec < PrecConcat)
-- t_->append(")");
-- break;
--
-- case kRegexpAlternate:
-- // Clumsy but workable: the children all appended |
-- // at the end of their strings, so just remove the last one.
-- if ((*t_)[t_->size()-1] == '|')
-- t_->erase(t_->size()-1);
-- else
-- LOG(DFATAL) << "Bad final char: " << t_;
-- if (prec < PrecAlternate)
-- t_->append(")");
-- break;
--
-- case kRegexpStar:
-- t_->append("*");
-- if (re->parse_flags() & Regexp::NonGreedy)
-- t_->append("?");
-- if (prec < PrecUnary)
-- t_->append(")");
-- break;
--
-- case kRegexpPlus:
-- t_->append("+");
-- if (re->parse_flags() & Regexp::NonGreedy)
-- t_->append("?");
-- if (prec < PrecUnary)
-- t_->append(")");
-- break;
--
-- case kRegexpQuest:
-- t_->append("?");
-- if (re->parse_flags() & Regexp::NonGreedy)
-- t_->append("?");
-- if (prec < PrecUnary)
-- t_->append(")");
-- break;
--
-- case kRegexpRepeat:
-- if (re->max() == -1)
-- t_->append(StringPrintf("{%d,}", re->min()));
-- else if (re->min() == re->max())
-- t_->append(StringPrintf("{%d}", re->min()));
-- else
-- t_->append(StringPrintf("{%d,%d}", re->min(), re->max()));
-- if (re->parse_flags() & Regexp::NonGreedy)
-- t_->append("?");
-- if (prec < PrecUnary)
-- t_->append(")");
-- break;
--
-- case kRegexpAnyChar:
-- t_->append(".");
-- break;
--
-- case kRegexpAnyByte:
-- t_->append("\\C");
-- break;
--
-- case kRegexpBeginLine:
-- t_->append("^");
-- break;
--
-- case kRegexpEndLine:
-- t_->append("$");
-- break;
--
-- case kRegexpBeginText:
-- t_->append("(?-m:^)");
-- break;
--
-- case kRegexpEndText:
-- if (re->parse_flags() & Regexp::WasDollar)
-- t_->append("(?-m:$)");
-- else
-- t_->append("\\z");
-- break;
--
-- case kRegexpWordBoundary:
-- t_->append("\\b");
-- break;
--
-- case kRegexpNoWordBoundary:
-- t_->append("\\B");
-- break;
--
-- case kRegexpCharClass: {
-- if (re->cc()->size() == 0) {
-- t_->append("[^\\x00-\\x{10ffff}]");
-- break;
-- }
-- t_->append("[");
-- // Heuristic: show class as negated if it contains the
-- // non-character 0xFFFE.
-- CharClass* cc = re->cc();
-- if (cc->Contains(0xFFFE)) {
-- cc = cc->Negate();
-- t_->append("^");
-- }
-- for (CharClass::iterator i = cc->begin(); i != cc->end(); ++i)
-- AppendCCRange(t_, i->lo, i->hi);
-- if (cc != re->cc())
-- cc->Delete();
-- t_->append("]");
-- break;
-- }
--
-- case kRegexpCapture:
-- t_->append(")");
-- break;
--
-- case kRegexpHaveMatch:
-- // There's no syntax accepted by the parser to generate
-- // this node (it is generated by RE2::Set) so make something
-- // up that is readable but won't compile.
-- t_->append("(?HaveMatch:%d)", re->match_id());
-- break;
-- }
--
-- // If the parent is an alternation, append the | for it.
-- if (prec == PrecAlternate)
-- t_->append("|");
--
-- return 0;
--}
--
--// Appends a rune for use in a character class to the string t.
--static void AppendCCChar(string* t, Rune r) {
-- if (0x20 <= r && r <= 0x7E) {
-- if (strchr("[]^-\\", r))
-- t->append("\\");
-- t->append(1, r);
-- return;
-- }
-- switch (r) {
-- default:
-- break;
--
-- case '\r':
-- t->append("\\r");
-- return;
--
-- case '\t':
-- t->append("\\t");
-- return;
--
-- case '\n':
-- t->append("\\n");
-- return;
--
-- case '\f':
-- t->append("\\f");
-- return;
-- }
--
-- if (r < 0x100) {
-- StringAppendF(t, "\\x%02x", static_cast<int>(r));
-- return;
-- }
-- StringAppendF(t, "\\x{%x}", static_cast<int>(r));
--}
--
--static void AppendCCRange(string* t, Rune lo, Rune hi) {
-- if (lo > hi)
-- return;
-- AppendCCChar(t, lo);
-- if (lo < hi) {
-- t->append("-");
-- AppendCCChar(t, hi);
-- }
--}
--
--} // namespace re2
-diff --git a/re2/re2/unicode.py b/re2/re2/unicode.py
-deleted file mode 100755
-index 8d78312..0000000
---- a/re2/re2/unicode.py
-+++ /dev/null
-@@ -1,297 +0,0 @@
--# Copyright 2008 The RE2 Authors. All Rights Reserved.
--# Use of this source code is governed by a BSD-style
--# license that can be found in the LICENSE file.
--
--"""Parser for Unicode data files (as distributed by unicode.org)."""
--
--import os
--import re
--import urllib2
--
--# Directory or URL where Unicode tables reside.
--_UNICODE_DIR = "http://www.unicode.org/Public/6.0.0/ucd"
--
--# Largest valid Unicode code value.
--_RUNE_MAX = 0x10FFFF
--
--
--class Error(Exception):
-- """Unicode error base class."""
--
--
--class InputError(Error):
-- """Unicode input error class. Raised on invalid input."""
--
--
--def _UInt(s):
-- """Converts string to Unicode code point ('263A' => 0x263a).
--
-- Args:
-- s: string to convert
--
-- Returns:
-- Unicode code point
--
-- Raises:
-- InputError: the string is not a valid Unicode value.
-- """
--
-- try:
-- v = int(s, 16)
-- except ValueError:
-- v = -1
-- if len(s) < 4 or len(s) > 6 or v < 0 or v > _RUNE_MAX:
-- raise InputError("invalid Unicode value %s" % (s,))
-- return v
--
--
--def _URange(s):
-- """Converts string to Unicode range.
--
-- '0001..0003' => [1, 2, 3].
-- '0001' => [1].
--
-- Args:
-- s: string to convert
--
-- Returns:
-- Unicode range
--
-- Raises:
-- InputError: the string is not a valid Unicode range.
-- """
-- a = s.split("..")
-- if len(a) == 1:
-- return [_UInt(a[0])]
-- if len(a) == 2:
-- lo = _UInt(a[0])
-- hi = _UInt(a[1])
-- if lo < hi:
-- return range(lo, hi + 1)
-- raise InputError("invalid Unicode range %s" % (s,))
--
--
--def _UStr(v):
-- """Converts Unicode code point to hex string.
--
-- 0x263a => '0x263A'.
--
-- Args:
-- v: code point to convert
--
-- Returns:
-- Unicode string
--
-- Raises:
-- InputError: the argument is not a valid Unicode value.
-- """
-- if v < 0 or v > _RUNE_MAX:
-- raise InputError("invalid Unicode value %s" % (v,))
-- return "0x%04X" % (v,)
--
--
--def _ParseContinue(s):
-- """Parses a Unicode continuation field.
--
-- These are of the form '<Name, First>' or '<Name, Last>'.
-- Instead of giving an explicit range in a single table entry,
-- some Unicode tables use two entries, one for the first
-- code value in the range and one for the last.
-- The first entry's description is '<Name, First>' instead of 'Name'
-- and the second is '<Name, Last>'.
--
-- '<Name, First>' => ('Name', 'First')
-- '<Name, Last>' => ('Name', 'Last')
-- 'Anything else' => ('Anything else', None)
--
-- Args:
-- s: continuation field string
--
-- Returns:
-- pair: name and ('First', 'Last', or None)
-- """
--
-- match = re.match("<(.*), (First|Last)>", s)
-- if match is not None:
-- return match.groups()
-- return (s, None)
--
--
--def ReadUnicodeTable(filename, nfields, doline):
-- """Generic Unicode table text file reader.
--
-- The reader takes care of stripping out comments and also
-- parsing the two different ways that the Unicode tables specify
-- code ranges (using the .. notation and splitting the range across
-- multiple lines).
--
-- Each non-comment line in the table is expected to have the given
-- number of fields. The first field is known to be the Unicode value
-- and the second field its description.
--
-- The reader calls doline(codes, fields) for each entry in the table.
-- If fn raises an exception, the reader prints that exception,
-- prefixed with the file name and line number, and continues
-- processing the file. When done with the file, the reader re-raises
-- the first exception encountered during the file.
--
-- Arguments:
-- filename: the Unicode data file to read, or a file-like object.
-- nfields: the number of expected fields per line in that file.
-- doline: the function to call for each table entry.
--
-- Raises:
-- InputError: nfields is invalid (must be >= 2).
-- """
--
-- if nfields < 2:
-- raise InputError("invalid number of fields %d" % (nfields,))
--
-- if type(filename) == str:
-- if filename.startswith("http://"):
-- fil = urllib2.urlopen(filename)
-- else:
-- fil = open(filename, "r")
-- else:
-- fil = filename
--
-- first = None # first code in multiline range
-- expect_last = None # tag expected for "Last" line in multiline range
-- lineno = 0 # current line number
-- for line in fil:
-- lineno += 1
-- try:
-- # Chop # comments and white space; ignore empty lines.
-- sharp = line.find("#")
-- if sharp >= 0:
-- line = line[:sharp]
-- line = line.strip()
-- if not line:
-- continue
--
-- # Split fields on ";", chop more white space.
-- # Must have the expected number of fields.
-- fields = [s.strip() for s in line.split(";")]
-- if len(fields) != nfields:
-- raise InputError("wrong number of fields %d %d - %s" %
-- (len(fields), nfields, line))
--
-- # The Unicode text files have two different ways
-- # to list a Unicode range. Either the first field is
-- # itself a range (0000..FFFF), or the range is split
-- # across two lines, with the second field noting
-- # the continuation.
-- codes = _URange(fields[0])
-- (name, cont) = _ParseContinue(fields[1])
--
-- if expect_last is not None:
-- # If the last line gave the First code in a range,
-- # this one had better give the Last one.
-- if (len(codes) != 1 or codes[0] <= first or
-- cont != "Last" or name != expect_last):
-- raise InputError("expected Last line for %s" %
-- (expect_last,))
-- codes = range(first, codes[0] + 1)
-- first = None
-- expect_last = None
-- fields[0] = "%04X..%04X" % (codes[0], codes[-1])
-- fields[1] = name
-- elif cont == "First":
-- # Otherwise, if this is the First code in a range,
-- # remember it and go to the next line.
-- if len(codes) != 1:
-- raise InputError("bad First line: range given")
-- expect_last = name
-- first = codes[0]
-- continue
--
-- doline(codes, fields)
--
-- except Exception, e:
-- print "%s:%d: %s" % (filename, lineno, e)
-- raise
--
-- if expect_last is not None:
-- raise InputError("expected Last line for %s; got EOF" %
-- (expect_last,))
--
--
--def CaseGroups(unicode_dir=_UNICODE_DIR):
-- """Returns list of Unicode code groups equivalent under case folding.
--
-- Each group is a sorted list of code points,
-- and the list of groups is sorted by first code point
-- in the group.
--
-- Args:
-- unicode_dir: Unicode data directory
--
-- Returns:
-- list of Unicode code groups
-- """
--
-- # Dict mapping lowercase code point to fold-equivalent group.
-- togroup = {}
--
-- def DoLine(codes, fields):
-- """Process single CaseFolding.txt line, updating togroup."""
-- (_, foldtype, lower, _) = fields
-- if foldtype not in ("C", "S"):
-- return
-- lower = _UInt(lower)
-- togroup.setdefault(lower, [lower]).extend(codes)
--
-- ReadUnicodeTable(unicode_dir+"/CaseFolding.txt", 4, DoLine)
--
-- groups = togroup.values()
-- for g in groups:
-- g.sort()
-- groups.sort()
-- return togroup, groups
--
--
--def Scripts(unicode_dir=_UNICODE_DIR):
-- """Returns dict mapping script names to code lists.
--
-- Args:
-- unicode_dir: Unicode data directory
--
-- Returns:
-- dict mapping script names to code lists
-- """
--
-- scripts = {}
--
-- def DoLine(codes, fields):
-- """Process single Scripts.txt line, updating scripts."""
-- (_, name) = fields
-- scripts.setdefault(name, []).extend(codes)
--
-- ReadUnicodeTable(unicode_dir+"/Scripts.txt", 2, DoLine)
-- return scripts
--
--
--def Categories(unicode_dir=_UNICODE_DIR):
-- """Returns dict mapping category names to code lists.
--
-- Args:
-- unicode_dir: Unicode data directory
--
-- Returns:
-- dict mapping category names to code lists
-- """
--
-- categories = {}
--
-- def DoLine(codes, fields):
-- """Process single UnicodeData.txt line, updating categories."""
-- category = fields[2]
-- categories.setdefault(category, []).extend(codes)
-- # Add codes from Lu into L, etc.
-- if len(category) > 1:
-- short = category[0]
-- categories.setdefault(short, []).extend(codes)
--
-- ReadUnicodeTable(unicode_dir+"/UnicodeData.txt", 15, DoLine)
-- return categories
--
-diff --git a/re2/re2/unicode_casefold.cc b/re2/re2/unicode_casefold.cc
-deleted file mode 100644
-index 6d4e878..0000000
---- a/re2/re2/unicode_casefold.cc
-+++ /dev/null
-@@ -1,469 +0,0 @@
--
--// GENERATED BY make_unicode_casefold.py; DO NOT EDIT.
--// make_unicode_casefold.py >unicode_casefold.cc
--
--#include "re2/unicode_casefold.h"
--
--namespace re2 {
--
--
--// 1029 groups, 2079 pairs, 282 ranges
--CaseFold unicode_casefold[] = {
-- { 65, 90, 32 },
-- { 97, 106, -32 },
-- { 107, 107, 8383 },
-- { 108, 114, -32 },
-- { 115, 115, 268 },
-- { 116, 122, -32 },
-- { 181, 181, 743 },
-- { 192, 214, 32 },
-- { 216, 222, 32 },
-- { 223, 223, 7615 },
-- { 224, 228, -32 },
-- { 229, 229, 8262 },
-- { 230, 246, -32 },
-- { 248, 254, -32 },
-- { 255, 255, 121 },
-- { 256, 303, EvenOdd },
-- { 306, 311, EvenOdd },
-- { 313, 328, OddEven },
-- { 330, 375, EvenOdd },
-- { 376, 376, -121 },
-- { 377, 382, OddEven },
-- { 383, 383, -300 },
-- { 384, 384, 195 },
-- { 385, 385, 210 },
-- { 386, 389, EvenOdd },
-- { 390, 390, 206 },
-- { 391, 392, OddEven },
-- { 393, 394, 205 },
-- { 395, 396, OddEven },
-- { 398, 398, 79 },
-- { 399, 399, 202 },
-- { 400, 400, 203 },
-- { 401, 402, OddEven },
-- { 403, 403, 205 },
-- { 404, 404, 207 },
-- { 405, 405, 97 },
-- { 406, 406, 211 },
-- { 407, 407, 209 },
-- { 408, 409, EvenOdd },
-- { 410, 410, 163 },
-- { 412, 412, 211 },
-- { 413, 413, 213 },
-- { 414, 414, 130 },
-- { 415, 415, 214 },
-- { 416, 421, EvenOdd },
-- { 422, 422, 218 },
-- { 423, 424, OddEven },
-- { 425, 425, 218 },
-- { 428, 429, EvenOdd },
-- { 430, 430, 218 },
-- { 431, 432, OddEven },
-- { 433, 434, 217 },
-- { 435, 438, OddEven },
-- { 439, 439, 219 },
-- { 440, 441, EvenOdd },
-- { 444, 445, EvenOdd },
-- { 447, 447, 56 },
-- { 452, 452, EvenOdd },
-- { 453, 453, OddEven },
-- { 454, 454, -2 },
-- { 455, 455, OddEven },
-- { 456, 456, EvenOdd },
-- { 457, 457, -2 },
-- { 458, 458, EvenOdd },
-- { 459, 459, OddEven },
-- { 460, 460, -2 },
-- { 461, 476, OddEven },
-- { 477, 477, -79 },
-- { 478, 495, EvenOdd },
-- { 497, 497, OddEven },
-- { 498, 498, EvenOdd },
-- { 499, 499, -2 },
-- { 500, 501, EvenOdd },
-- { 502, 502, -97 },
-- { 503, 503, -56 },
-- { 504, 543, EvenOdd },
-- { 544, 544, -130 },
-- { 546, 563, EvenOdd },
-- { 570, 570, 10795 },
-- { 571, 572, OddEven },
-- { 573, 573, -163 },
-- { 574, 574, 10792 },
-- { 575, 576, 10815 },
-- { 577, 578, OddEven },
-- { 579, 579, -195 },
-- { 580, 580, 69 },
-- { 581, 581, 71 },
-- { 582, 591, EvenOdd },
-- { 592, 592, 10783 },
-- { 593, 593, 10780 },
-- { 594, 594, 10782 },
-- { 595, 595, -210 },
-- { 596, 596, -206 },
-- { 598, 599, -205 },
-- { 601, 601, -202 },
-- { 603, 603, -203 },
-- { 608, 608, -205 },
-- { 611, 611, -207 },
-- { 613, 613, 42280 },
-- { 616, 616, -209 },
-- { 617, 617, -211 },
-- { 619, 619, 10743 },
-- { 623, 623, -211 },
-- { 625, 625, 10749 },
-- { 626, 626, -213 },
-- { 629, 629, -214 },
-- { 637, 637, 10727 },
-- { 640, 640, -218 },
-- { 643, 643, -218 },
-- { 648, 648, -218 },
-- { 649, 649, -69 },
-- { 650, 651, -217 },
-- { 652, 652, -71 },
-- { 658, 658, -219 },
-- { 837, 837, 84 },
-- { 880, 883, EvenOdd },
-- { 886, 887, EvenOdd },
-- { 891, 893, 130 },
-- { 902, 902, 38 },
-- { 904, 906, 37 },
-- { 908, 908, 64 },
-- { 910, 911, 63 },
-- { 913, 929, 32 },
-- { 931, 931, 31 },
-- { 932, 939, 32 },
-- { 940, 940, -38 },
-- { 941, 943, -37 },
-- { 945, 945, -32 },
-- { 946, 946, 30 },
-- { 947, 948, -32 },
-- { 949, 949, 64 },
-- { 950, 951, -32 },
-- { 952, 952, 25 },
-- { 953, 953, 7173 },
-- { 954, 954, 54 },
-- { 955, 955, -32 },
-- { 956, 956, -775 },
-- { 957, 959, -32 },
-- { 960, 960, 22 },
-- { 961, 961, 48 },
-- { 962, 962, EvenOdd },
-- { 963, 965, -32 },
-- { 966, 966, 15 },
-- { 967, 968, -32 },
-- { 969, 969, 7517 },
-- { 970, 971, -32 },
-- { 972, 972, -64 },
-- { 973, 974, -63 },
-- { 975, 975, 8 },
-- { 976, 976, -62 },
-- { 977, 977, 35 },
-- { 981, 981, -47 },
-- { 982, 982, -54 },
-- { 983, 983, -8 },
-- { 984, 1007, EvenOdd },
-- { 1008, 1008, -86 },
-- { 1009, 1009, -80 },
-- { 1010, 1010, 7 },
-- { 1012, 1012, -92 },
-- { 1013, 1013, -96 },
-- { 1015, 1016, OddEven },
-- { 1017, 1017, -7 },
-- { 1018, 1019, EvenOdd },
-- { 1021, 1023, -130 },
-- { 1024, 1039, 80 },
-- { 1040, 1071, 32 },
-- { 1072, 1103, -32 },
-- { 1104, 1119, -80 },
-- { 1120, 1153, EvenOdd },
-- { 1162, 1215, EvenOdd },
-- { 1216, 1216, 15 },
-- { 1217, 1230, OddEven },
-- { 1231, 1231, -15 },
-- { 1232, 1319, EvenOdd },
-- { 1329, 1366, 48 },
-- { 1377, 1414, -48 },
-- { 4256, 4293, 7264 },
-- { 7545, 7545, 35332 },
-- { 7549, 7549, 3814 },
-- { 7680, 7776, EvenOdd },
-- { 7777, 7777, 58 },
-- { 7778, 7829, EvenOdd },
-- { 7835, 7835, -59 },
-- { 7838, 7838, -7615 },
-- { 7840, 7935, EvenOdd },
-- { 7936, 7943, 8 },
-- { 7944, 7951, -8 },
-- { 7952, 7957, 8 },
-- { 7960, 7965, -8 },
-- { 7968, 7975, 8 },
-- { 7976, 7983, -8 },
-- { 7984, 7991, 8 },
-- { 7992, 7999, -8 },
-- { 8000, 8005, 8 },
-- { 8008, 8013, -8 },
-- { 8017, 8017, 8 },
-- { 8019, 8019, 8 },
-- { 8021, 8021, 8 },
-- { 8023, 8023, 8 },
-- { 8025, 8025, -8 },
-- { 8027, 8027, -8 },
-- { 8029, 8029, -8 },
-- { 8031, 8031, -8 },
-- { 8032, 8039, 8 },
-- { 8040, 8047, -8 },
-- { 8048, 8049, 74 },
-- { 8050, 8053, 86 },
-- { 8054, 8055, 100 },
-- { 8056, 8057, 128 },
-- { 8058, 8059, 112 },
-- { 8060, 8061, 126 },
-- { 8064, 8071, 8 },
-- { 8072, 8079, -8 },
-- { 8080, 8087, 8 },
-- { 8088, 8095, -8 },
-- { 8096, 8103, 8 },
-- { 8104, 8111, -8 },
-- { 8112, 8113, 8 },
-- { 8115, 8115, 9 },
-- { 8120, 8121, -8 },
-- { 8122, 8123, -74 },
-- { 8124, 8124, -9 },
-- { 8126, 8126, -7289 },
-- { 8131, 8131, 9 },
-- { 8136, 8139, -86 },
-- { 8140, 8140, -9 },
-- { 8144, 8145, 8 },
-- { 8152, 8153, -8 },
-- { 8154, 8155, -100 },
-- { 8160, 8161, 8 },
-- { 8165, 8165, 7 },
-- { 8168, 8169, -8 },
-- { 8170, 8171, -112 },
-- { 8172, 8172, -7 },
-- { 8179, 8179, 9 },
-- { 8184, 8185, -128 },
-- { 8186, 8187, -126 },
-- { 8188, 8188, -9 },
-- { 8486, 8486, -7549 },
-- { 8490, 8490, -8415 },
-- { 8491, 8491, -8294 },
-- { 8498, 8498, 28 },
-- { 8526, 8526, -28 },
-- { 8544, 8559, 16 },
-- { 8560, 8575, -16 },
-- { 8579, 8580, OddEven },
-- { 9398, 9423, 26 },
-- { 9424, 9449, -26 },
-- { 11264, 11310, 48 },
-- { 11312, 11358, -48 },
-- { 11360, 11361, EvenOdd },
-- { 11362, 11362, -10743 },
-- { 11363, 11363, -3814 },
-- { 11364, 11364, -10727 },
-- { 11365, 11365, -10795 },
-- { 11366, 11366, -10792 },
-- { 11367, 11372, OddEven },
-- { 11373, 11373, -10780 },
-- { 11374, 11374, -10749 },
-- { 11375, 11375, -10783 },
-- { 11376, 11376, -10782 },
-- { 11378, 11379, EvenOdd },
-- { 11381, 11382, OddEven },
-- { 11390, 11391, -10815 },
-- { 11392, 11491, EvenOdd },
-- { 11499, 11502, OddEven },
-- { 11520, 11557, -7264 },
-- { 42560, 42605, EvenOdd },
-- { 42624, 42647, EvenOdd },
-- { 42786, 42799, EvenOdd },
-- { 42802, 42863, EvenOdd },
-- { 42873, 42876, OddEven },
-- { 42877, 42877, -35332 },
-- { 42878, 42887, EvenOdd },
-- { 42891, 42892, OddEven },
-- { 42893, 42893, -42280 },
-- { 42896, 42897, EvenOdd },
-- { 42912, 42921, EvenOdd },
-- { 65313, 65338, 32 },
-- { 65345, 65370, -32 },
-- { 66560, 66599, 40 },
-- { 66600, 66639, -40 },
--};
--int num_unicode_casefold = 282;
--
--// 1029 groups, 1050 pairs, 163 ranges
--CaseFold unicode_tolower[] = {
-- { 65, 90, 32 },
-- { 181, 181, 775 },
-- { 192, 214, 32 },
-- { 216, 222, 32 },
-- { 256, 302, EvenOddSkip },
-- { 306, 310, EvenOddSkip },
-- { 313, 327, OddEvenSkip },
-- { 330, 374, EvenOddSkip },
-- { 376, 376, -121 },
-- { 377, 381, OddEvenSkip },
-- { 383, 383, -268 },
-- { 385, 385, 210 },
-- { 386, 388, EvenOddSkip },
-- { 390, 390, 206 },
-- { 391, 391, OddEven },
-- { 393, 394, 205 },
-- { 395, 395, OddEven },
-- { 398, 398, 79 },
-- { 399, 399, 202 },
-- { 400, 400, 203 },
-- { 401, 401, OddEven },
-- { 403, 403, 205 },
-- { 404, 404, 207 },
-- { 406, 406, 211 },
-- { 407, 407, 209 },
-- { 408, 408, EvenOdd },
-- { 412, 412, 211 },
-- { 413, 413, 213 },
-- { 415, 415, 214 },
-- { 416, 420, EvenOddSkip },
-- { 422, 422, 218 },
-- { 423, 423, OddEven },
-- { 425, 425, 218 },
-- { 428, 428, EvenOdd },
-- { 430, 430, 218 },
-- { 431, 431, OddEven },
-- { 433, 434, 217 },
-- { 435, 437, OddEvenSkip },
-- { 439, 439, 219 },
-- { 440, 440, EvenOdd },
-- { 444, 444, EvenOdd },
-- { 452, 452, 2 },
-- { 453, 453, OddEven },
-- { 455, 455, 2 },
-- { 456, 456, EvenOdd },
-- { 458, 458, 2 },
-- { 459, 475, OddEvenSkip },
-- { 478, 494, EvenOddSkip },
-- { 497, 497, 2 },
-- { 498, 500, EvenOddSkip },
-- { 502, 502, -97 },
-- { 503, 503, -56 },
-- { 504, 542, EvenOddSkip },
-- { 544, 544, -130 },
-- { 546, 562, EvenOddSkip },
-- { 570, 570, 10795 },
-- { 571, 571, OddEven },
-- { 573, 573, -163 },
-- { 574, 574, 10792 },
-- { 577, 577, OddEven },
-- { 579, 579, -195 },
-- { 580, 580, 69 },
-- { 581, 581, 71 },
-- { 582, 590, EvenOddSkip },
-- { 837, 837, 116 },
-- { 880, 882, EvenOddSkip },
-- { 886, 886, EvenOdd },
-- { 902, 902, 38 },
-- { 904, 906, 37 },
-- { 908, 908, 64 },
-- { 910, 911, 63 },
-- { 913, 929, 32 },
-- { 931, 939, 32 },
-- { 962, 962, EvenOdd },
-- { 975, 975, 8 },
-- { 976, 976, -30 },
-- { 977, 977, -25 },
-- { 981, 981, -15 },
-- { 982, 982, -22 },
-- { 984, 1006, EvenOddSkip },
-- { 1008, 1008, -54 },
-- { 1009, 1009, -48 },
-- { 1012, 1012, -60 },
-- { 1013, 1013, -64 },
-- { 1015, 1015, OddEven },
-- { 1017, 1017, -7 },
-- { 1018, 1018, EvenOdd },
-- { 1021, 1023, -130 },
-- { 1024, 1039, 80 },
-- { 1040, 1071, 32 },
-- { 1120, 1152, EvenOddSkip },
-- { 1162, 1214, EvenOddSkip },
-- { 1216, 1216, 15 },
-- { 1217, 1229, OddEvenSkip },
-- { 1232, 1318, EvenOddSkip },
-- { 1329, 1366, 48 },
-- { 4256, 4293, 7264 },
-- { 7680, 7828, EvenOddSkip },
-- { 7835, 7835, -58 },
-- { 7838, 7838, -7615 },
-- { 7840, 7934, EvenOddSkip },
-- { 7944, 7951, -8 },
-- { 7960, 7965, -8 },
-- { 7976, 7983, -8 },
-- { 7992, 7999, -8 },
-- { 8008, 8013, -8 },
-- { 8025, 8025, -8 },
-- { 8027, 8027, -8 },
-- { 8029, 8029, -8 },
-- { 8031, 8031, -8 },
-- { 8040, 8047, -8 },
-- { 8072, 8079, -8 },
-- { 8088, 8095, -8 },
-- { 8104, 8111, -8 },
-- { 8120, 8121, -8 },
-- { 8122, 8123, -74 },
-- { 8124, 8124, -9 },
-- { 8126, 8126, -7173 },
-- { 8136, 8139, -86 },
-- { 8140, 8140, -9 },
-- { 8152, 8153, -8 },
-- { 8154, 8155, -100 },
-- { 8168, 8169, -8 },
-- { 8170, 8171, -112 },
-- { 8172, 8172, -7 },
-- { 8184, 8185, -128 },
-- { 8186, 8187, -126 },
-- { 8188, 8188, -9 },
-- { 8486, 8486, -7517 },
-- { 8490, 8490, -8383 },
-- { 8491, 8491, -8262 },
-- { 8498, 8498, 28 },
-- { 8544, 8559, 16 },
-- { 8579, 8579, OddEven },
-- { 9398, 9423, 26 },
-- { 11264, 11310, 48 },
-- { 11360, 11360, EvenOdd },
-- { 11362, 11362, -10743 },
-- { 11363, 11363, -3814 },
-- { 11364, 11364, -10727 },
-- { 11367, 11371, OddEvenSkip },
-- { 11373, 11373, -10780 },
-- { 11374, 11374, -10749 },
-- { 11375, 11375, -10783 },
-- { 11376, 11376, -10782 },
-- { 11378, 11378, EvenOdd },
-- { 11381, 11381, OddEven },
-- { 11390, 11391, -10815 },
-- { 11392, 11490, EvenOddSkip },
-- { 11499, 11501, OddEvenSkip },
-- { 42560, 42604, EvenOddSkip },
-- { 42624, 42646, EvenOddSkip },
-- { 42786, 42798, EvenOddSkip },
-- { 42802, 42862, EvenOddSkip },
-- { 42873, 42875, OddEvenSkip },
-- { 42877, 42877, -35332 },
-- { 42878, 42886, EvenOddSkip },
-- { 42891, 42891, OddEven },
-- { 42893, 42893, -42280 },
-- { 42896, 42896, EvenOdd },
-- { 42912, 42920, EvenOddSkip },
-- { 65313, 65338, 32 },
-- { 66560, 66599, 40 },
--};
--int num_unicode_tolower = 163;
--
--
--
--} // namespace re2
--
--
-diff --git a/re2/re2/unicode_casefold.h b/re2/re2/unicode_casefold.h
-deleted file mode 100644
-index 160b07e..0000000
---- a/re2/re2/unicode_casefold.h
-+++ /dev/null
-@@ -1,75 +0,0 @@
--// Copyright 2008 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Unicode case folding tables.
--
--// The Unicode case folding tables encode the mapping from one Unicode point
--// to the next largest Unicode point with equivalent folding. The largest
--// point wraps back to the first. For example, the tables map:
--//
--// 'A' -> 'a'
--// 'a' -> 'A'
--//
--// 'K' -> 'k'
--// 'k' -> 'K' (Kelvin symbol)
--// 'K' -> 'K'
--//
--// Like everything Unicode, these tables are big. If we represent the table
--// as a sorted list of uint32 pairs, it has 2049 entries and is 16 kB.
--// Most table entries look like the ones around them:
--// 'A' maps to 'A'+32, 'B' maps to 'B'+32, etc.
--// Instead of listing all the pairs explicitly, we make a list of ranges
--// and deltas, so that the table entries for 'A' through 'Z' can be represented
--// as a single entry { 'A', 'Z', +32 }.
--//
--// In addition to blocks that map to each other (A-Z mapping to a-z)
--// there are blocks of pairs that individually map to each other
--// (for example, 0100<->0101, 0102<->0103, 0104<->0105, ...).
--// For those, the special delta value EvenOdd marks even/odd pairs
--// (if even, add 1; if odd, subtract 1), and OddEven marks odd/even pairs.
--//
--// In this form, the table has 274 entries, about 3kB. If we were to split
--// the table into one for 16-bit codes and an overflow table for larger ones,
--// we could get it down to about 1.5kB, but that's not worth the complexity.
--//
--// The grouped form also allows for efficient fold range calculations
--// rather than looping one character at a time.
--
--#ifndef RE2_UNICODE_CASEFOLD_H__
--#define RE2_UNICODE_CASEFOLD_H__
--
--#include "util/util.h"
--
--namespace re2 {
--
--enum {
-- EvenOdd = 1,
-- OddEven = -1,
-- EvenOddSkip = 1<<30,
-- OddEvenSkip,
--};
--
--struct CaseFold {
-- uint32 lo;
-- uint32 hi;
-- int32 delta;
--};
--
--extern CaseFold unicode_casefold[];
--extern int num_unicode_casefold;
--
--extern CaseFold unicode_tolower[];
--extern int num_unicode_tolower;
--
--// Returns the CaseFold* in the tables that contains rune.
--// If rune is not in the tables, returns the first CaseFold* after rune.
--// If rune is larger than any value in the tables, returns NULL.
--extern CaseFold* LookupCaseFold(CaseFold*, int, Rune rune);
--
--// Returns the result of applying the fold f to the rune r.
--extern Rune ApplyFold(CaseFold *f, Rune r);
--
--} // namespace re2
--
--#endif // RE2_UNICODE_CASEFOLD_H__
-diff --git a/re2/re2/unicode_groups.cc b/re2/re2/unicode_groups.cc
-deleted file mode 100644
-index b57a327..0000000
---- a/re2/re2/unicode_groups.cc
-+++ /dev/null
-@@ -1,4851 +0,0 @@
--
--// GENERATED BY make_unicode_groups.py; DO NOT EDIT.
--// make_unicode_groups.py >unicode_groups.cc
--
--#include "re2/unicode_groups.h"
--
--namespace re2 {
--
--
--static URange16 Ps_range16[] = {
-- { 40, 40 },
-- { 91, 91 },
-- { 123, 123 },
-- { 3898, 3898 },
-- { 3900, 3900 },
-- { 5787, 5787 },
-- { 8218, 8218 },
-- { 8222, 8222 },
-- { 8261, 8261 },
-- { 8317, 8317 },
-- { 8333, 8333 },
-- { 9001, 9001 },
-- { 10088, 10088 },
-- { 10090, 10090 },
-- { 10092, 10092 },
-- { 10094, 10094 },
-- { 10096, 10096 },
-- { 10098, 10098 },
-- { 10100, 10100 },
-- { 10181, 10181 },
-- { 10214, 10214 },
-- { 10216, 10216 },
-- { 10218, 10218 },
-- { 10220, 10220 },
-- { 10222, 10222 },
-- { 10627, 10627 },
-- { 10629, 10629 },
-- { 10631, 10631 },
-- { 10633, 10633 },
-- { 10635, 10635 },
-- { 10637, 10637 },
-- { 10639, 10639 },
-- { 10641, 10641 },
-- { 10643, 10643 },
-- { 10645, 10645 },
-- { 10647, 10647 },
-- { 10712, 10712 },
-- { 10714, 10714 },
-- { 10748, 10748 },
-- { 11810, 11810 },
-- { 11812, 11812 },
-- { 11814, 11814 },
-- { 11816, 11816 },
-- { 12296, 12296 },
-- { 12298, 12298 },
-- { 12300, 12300 },
-- { 12302, 12302 },
-- { 12304, 12304 },
-- { 12308, 12308 },
-- { 12310, 12310 },
-- { 12312, 12312 },
-- { 12314, 12314 },
-- { 12317, 12317 },
-- { 64830, 64830 },
-- { 65047, 65047 },
-- { 65077, 65077 },
-- { 65079, 65079 },
-- { 65081, 65081 },
-- { 65083, 65083 },
-- { 65085, 65085 },
-- { 65087, 65087 },
-- { 65089, 65089 },
-- { 65091, 65091 },
-- { 65095, 65095 },
-- { 65113, 65113 },
-- { 65115, 65115 },
-- { 65117, 65117 },
-- { 65288, 65288 },
-- { 65339, 65339 },
-- { 65371, 65371 },
-- { 65375, 65375 },
-- { 65378, 65378 },
--};
--static URange16 Nl_range16[] = {
-- { 5870, 5872 },
-- { 8544, 8578 },
-- { 8581, 8584 },
-- { 12295, 12295 },
-- { 12321, 12329 },
-- { 12344, 12346 },
-- { 42726, 42735 },
--};
--static URange32 Nl_range32[] = {
-- { 65856, 65908 },
-- { 66369, 66369 },
-- { 66378, 66378 },
-- { 66513, 66517 },
-- { 74752, 74850 },
--};
--static URange16 No_range16[] = {
-- { 178, 179 },
-- { 185, 185 },
-- { 188, 190 },
-- { 2548, 2553 },
-- { 2930, 2935 },
-- { 3056, 3058 },
-- { 3192, 3198 },
-- { 3440, 3445 },
-- { 3882, 3891 },
-- { 4969, 4988 },
-- { 6128, 6137 },
-- { 6618, 6618 },
-- { 8304, 8304 },
-- { 8308, 8313 },
-- { 8320, 8329 },
-- { 8528, 8543 },
-- { 8585, 8585 },
-- { 9312, 9371 },
-- { 9450, 9471 },
-- { 10102, 10131 },
-- { 11517, 11517 },
-- { 12690, 12693 },
-- { 12832, 12841 },
-- { 12881, 12895 },
-- { 12928, 12937 },
-- { 12977, 12991 },
-- { 43056, 43061 },
--};
--static URange32 No_range32[] = {
-- { 65799, 65843 },
-- { 65909, 65912 },
-- { 65930, 65930 },
-- { 66336, 66339 },
-- { 67672, 67679 },
-- { 67862, 67867 },
-- { 68160, 68167 },
-- { 68221, 68222 },
-- { 68440, 68447 },
-- { 68472, 68479 },
-- { 69216, 69246 },
-- { 69714, 69733 },
-- { 119648, 119665 },
-- { 127232, 127242 },
--};
--static URange16 Lo_range16[] = {
-- { 443, 443 },
-- { 448, 451 },
-- { 660, 660 },
-- { 1488, 1514 },
-- { 1520, 1522 },
-- { 1568, 1599 },
-- { 1601, 1610 },
-- { 1646, 1647 },
-- { 1649, 1747 },
-- { 1749, 1749 },
-- { 1774, 1775 },
-- { 1786, 1788 },
-- { 1791, 1791 },
-- { 1808, 1808 },
-- { 1810, 1839 },
-- { 1869, 1957 },
-- { 1969, 1969 },
-- { 1994, 2026 },
-- { 2048, 2069 },
-- { 2112, 2136 },
-- { 2308, 2361 },
-- { 2365, 2365 },
-- { 2384, 2384 },
-- { 2392, 2401 },
-- { 2418, 2423 },
-- { 2425, 2431 },
-- { 2437, 2444 },
-- { 2447, 2448 },
-- { 2451, 2472 },
-- { 2474, 2480 },
-- { 2482, 2482 },
-- { 2486, 2489 },
-- { 2493, 2493 },
-- { 2510, 2510 },
-- { 2524, 2525 },
-- { 2527, 2529 },
-- { 2544, 2545 },
-- { 2565, 2570 },
-- { 2575, 2576 },
-- { 2579, 2600 },
-- { 2602, 2608 },
-- { 2610, 2611 },
-- { 2613, 2614 },
-- { 2616, 2617 },
-- { 2649, 2652 },
-- { 2654, 2654 },
-- { 2674, 2676 },
-- { 2693, 2701 },
-- { 2703, 2705 },
-- { 2707, 2728 },
-- { 2730, 2736 },
-- { 2738, 2739 },
-- { 2741, 2745 },
-- { 2749, 2749 },
-- { 2768, 2768 },
-- { 2784, 2785 },
-- { 2821, 2828 },
-- { 2831, 2832 },
-- { 2835, 2856 },
-- { 2858, 2864 },
-- { 2866, 2867 },
-- { 2869, 2873 },
-- { 2877, 2877 },
-- { 2908, 2909 },
-- { 2911, 2913 },
-- { 2929, 2929 },
-- { 2947, 2947 },
-- { 2949, 2954 },
-- { 2958, 2960 },
-- { 2962, 2965 },
-- { 2969, 2970 },
-- { 2972, 2972 },
-- { 2974, 2975 },
-- { 2979, 2980 },
-- { 2984, 2986 },
-- { 2990, 3001 },
-- { 3024, 3024 },
-- { 3077, 3084 },
-- { 3086, 3088 },
-- { 3090, 3112 },
-- { 3114, 3123 },
-- { 3125, 3129 },
-- { 3133, 3133 },
-- { 3160, 3161 },
-- { 3168, 3169 },
-- { 3205, 3212 },
-- { 3214, 3216 },
-- { 3218, 3240 },
-- { 3242, 3251 },
-- { 3253, 3257 },
-- { 3261, 3261 },
-- { 3294, 3294 },
-- { 3296, 3297 },
-- { 3313, 3314 },
-- { 3333, 3340 },
-- { 3342, 3344 },
-- { 3346, 3386 },
-- { 3389, 3389 },
-- { 3406, 3406 },
-- { 3424, 3425 },
-- { 3450, 3455 },
-- { 3461, 3478 },
-- { 3482, 3505 },
-- { 3507, 3515 },
-- { 3517, 3517 },
-- { 3520, 3526 },
-- { 3585, 3632 },
-- { 3634, 3635 },
-- { 3648, 3653 },
-- { 3713, 3714 },
-- { 3716, 3716 },
-- { 3719, 3720 },
-- { 3722, 3722 },
-- { 3725, 3725 },
-- { 3732, 3735 },
-- { 3737, 3743 },
-- { 3745, 3747 },
-- { 3749, 3749 },
-- { 3751, 3751 },
-- { 3754, 3755 },
-- { 3757, 3760 },
-- { 3762, 3763 },
-- { 3773, 3773 },
-- { 3776, 3780 },
-- { 3804, 3805 },
-- { 3840, 3840 },
-- { 3904, 3911 },
-- { 3913, 3948 },
-- { 3976, 3980 },
-- { 4096, 4138 },
-- { 4159, 4159 },
-- { 4176, 4181 },
-- { 4186, 4189 },
-- { 4193, 4193 },
-- { 4197, 4198 },
-- { 4206, 4208 },
-- { 4213, 4225 },
-- { 4238, 4238 },
-- { 4304, 4346 },
-- { 4352, 4680 },
-- { 4682, 4685 },
-- { 4688, 4694 },
-- { 4696, 4696 },
-- { 4698, 4701 },
-- { 4704, 4744 },
-- { 4746, 4749 },
-- { 4752, 4784 },
-- { 4786, 4789 },
-- { 4792, 4798 },
-- { 4800, 4800 },
-- { 4802, 4805 },
-- { 4808, 4822 },
-- { 4824, 4880 },
-- { 4882, 4885 },
-- { 4888, 4954 },
-- { 4992, 5007 },
-- { 5024, 5108 },
-- { 5121, 5740 },
-- { 5743, 5759 },
-- { 5761, 5786 },
-- { 5792, 5866 },
-- { 5888, 5900 },
-- { 5902, 5905 },
-- { 5920, 5937 },
-- { 5952, 5969 },
-- { 5984, 5996 },
-- { 5998, 6000 },
-- { 6016, 6067 },
-- { 6108, 6108 },
-- { 6176, 6210 },
-- { 6212, 6263 },
-- { 6272, 6312 },
-- { 6314, 6314 },
-- { 6320, 6389 },
-- { 6400, 6428 },
-- { 6480, 6509 },
-- { 6512, 6516 },
-- { 6528, 6571 },
-- { 6593, 6599 },
-- { 6656, 6678 },
-- { 6688, 6740 },
-- { 6917, 6963 },
-- { 6981, 6987 },
-- { 7043, 7072 },
-- { 7086, 7087 },
-- { 7104, 7141 },
-- { 7168, 7203 },
-- { 7245, 7247 },
-- { 7258, 7287 },
-- { 7401, 7404 },
-- { 7406, 7409 },
-- { 8501, 8504 },
-- { 11568, 11621 },
-- { 11648, 11670 },
-- { 11680, 11686 },
-- { 11688, 11694 },
-- { 11696, 11702 },
-- { 11704, 11710 },
-- { 11712, 11718 },
-- { 11720, 11726 },
-- { 11728, 11734 },
-- { 11736, 11742 },
-- { 12294, 12294 },
-- { 12348, 12348 },
-- { 12353, 12438 },
-- { 12447, 12447 },
-- { 12449, 12538 },
-- { 12543, 12543 },
-- { 12549, 12589 },
-- { 12593, 12686 },
-- { 12704, 12730 },
-- { 12784, 12799 },
-- { 13312, 19893 },
-- { 19968, 40907 },
-- { 40960, 40980 },
-- { 40982, 42124 },
-- { 42192, 42231 },
-- { 42240, 42507 },
-- { 42512, 42527 },
-- { 42538, 42539 },
-- { 42606, 42606 },
-- { 42656, 42725 },
-- { 43003, 43009 },
-- { 43011, 43013 },
-- { 43015, 43018 },
-- { 43020, 43042 },
-- { 43072, 43123 },
-- { 43138, 43187 },
-- { 43250, 43255 },
-- { 43259, 43259 },
-- { 43274, 43301 },
-- { 43312, 43334 },
-- { 43360, 43388 },
-- { 43396, 43442 },
-- { 43520, 43560 },
-- { 43584, 43586 },
-- { 43588, 43595 },
-- { 43616, 43631 },
-- { 43633, 43638 },
-- { 43642, 43642 },
-- { 43648, 43695 },
-- { 43697, 43697 },
-- { 43701, 43702 },
-- { 43705, 43709 },
-- { 43712, 43712 },
-- { 43714, 43714 },
-- { 43739, 43740 },
-- { 43777, 43782 },
-- { 43785, 43790 },
-- { 43793, 43798 },
-- { 43808, 43814 },
-- { 43816, 43822 },
-- { 43968, 44002 },
-- { 44032, 55203 },
-- { 55216, 55238 },
-- { 55243, 55291 },
-- { 63744, 64045 },
-- { 64048, 64109 },
-- { 64112, 64217 },
-- { 64285, 64285 },
-- { 64287, 64296 },
-- { 64298, 64310 },
-- { 64312, 64316 },
-- { 64318, 64318 },
-- { 64320, 64321 },
-- { 64323, 64324 },
-- { 64326, 64433 },
-- { 64467, 64829 },
-- { 64848, 64911 },
-- { 64914, 64967 },
-- { 65008, 65019 },
-- { 65136, 65140 },
-- { 65142, 65276 },
-- { 65382, 65391 },
-- { 65393, 65437 },
-- { 65440, 65470 },
-- { 65474, 65479 },
-- { 65482, 65487 },
-- { 65490, 65495 },
-- { 65498, 65500 },
--};
--static URange32 Lo_range32[] = {
-- { 65536, 65547 },
-- { 65549, 65574 },
-- { 65576, 65594 },
-- { 65596, 65597 },
-- { 65599, 65613 },
-- { 65616, 65629 },
-- { 65664, 65786 },
-- { 66176, 66204 },
-- { 66208, 66256 },
-- { 66304, 66334 },
-- { 66352, 66368 },
-- { 66370, 66377 },
-- { 66432, 66461 },
-- { 66464, 66499 },
-- { 66504, 66511 },
-- { 66640, 66717 },
-- { 67584, 67589 },
-- { 67592, 67592 },
-- { 67594, 67637 },
-- { 67639, 67640 },
-- { 67644, 67644 },
-- { 67647, 67669 },
-- { 67840, 67861 },
-- { 67872, 67897 },
-- { 68096, 68096 },
-- { 68112, 68115 },
-- { 68117, 68119 },
-- { 68121, 68147 },
-- { 68192, 68220 },
-- { 68352, 68405 },
-- { 68416, 68437 },
-- { 68448, 68466 },
-- { 68608, 68680 },
-- { 69635, 69687 },
-- { 69763, 69807 },
-- { 73728, 74606 },
-- { 77824, 78894 },
-- { 92160, 92728 },
-- { 110592, 110593 },
-- { 131072, 173782 },
-- { 173824, 177972 },
-- { 177984, 178205 },
-- { 194560, 195101 },
--};
--static URange16 Ll_range16[] = {
-- { 97, 122 },
-- { 170, 170 },
-- { 181, 181 },
-- { 186, 186 },
-- { 223, 246 },
-- { 248, 255 },
-- { 257, 257 },
-- { 259, 259 },
-- { 261, 261 },
-- { 263, 263 },
-- { 265, 265 },
-- { 267, 267 },
-- { 269, 269 },
-- { 271, 271 },
-- { 273, 273 },
-- { 275, 275 },
-- { 277, 277 },
-- { 279, 279 },
-- { 281, 281 },
-- { 283, 283 },
-- { 285, 285 },
-- { 287, 287 },
-- { 289, 289 },
-- { 291, 291 },
-- { 293, 293 },
-- { 295, 295 },
-- { 297, 297 },
-- { 299, 299 },
-- { 301, 301 },
-- { 303, 303 },
-- { 305, 305 },
-- { 307, 307 },
-- { 309, 309 },
-- { 311, 312 },
-- { 314, 314 },
-- { 316, 316 },
-- { 318, 318 },
-- { 320, 320 },
-- { 322, 322 },
-- { 324, 324 },
-- { 326, 326 },
-- { 328, 329 },
-- { 331, 331 },
-- { 333, 333 },
-- { 335, 335 },
-- { 337, 337 },
-- { 339, 339 },
-- { 341, 341 },
-- { 343, 343 },
-- { 345, 345 },
-- { 347, 347 },
-- { 349, 349 },
-- { 351, 351 },
-- { 353, 353 },
-- { 355, 355 },
-- { 357, 357 },
-- { 359, 359 },
-- { 361, 361 },
-- { 363, 363 },
-- { 365, 365 },
-- { 367, 367 },
-- { 369, 369 },
-- { 371, 371 },
-- { 373, 373 },
-- { 375, 375 },
-- { 378, 378 },
-- { 380, 380 },
-- { 382, 384 },
-- { 387, 387 },
-- { 389, 389 },
-- { 392, 392 },
-- { 396, 397 },
-- { 402, 402 },
-- { 405, 405 },
-- { 409, 411 },
-- { 414, 414 },
-- { 417, 417 },
-- { 419, 419 },
-- { 421, 421 },
-- { 424, 424 },
-- { 426, 427 },
-- { 429, 429 },
-- { 432, 432 },
-- { 436, 436 },
-- { 438, 438 },
-- { 441, 442 },
-- { 445, 447 },
-- { 454, 454 },
-- { 457, 457 },
-- { 460, 460 },
-- { 462, 462 },
-- { 464, 464 },
-- { 466, 466 },
-- { 468, 468 },
-- { 470, 470 },
-- { 472, 472 },
-- { 474, 474 },
-- { 476, 477 },
-- { 479, 479 },
-- { 481, 481 },
-- { 483, 483 },
-- { 485, 485 },
-- { 487, 487 },
-- { 489, 489 },
-- { 491, 491 },
-- { 493, 493 },
-- { 495, 496 },
-- { 499, 499 },
-- { 501, 501 },
-- { 505, 505 },
-- { 507, 507 },
-- { 509, 509 },
-- { 511, 511 },
-- { 513, 513 },
-- { 515, 515 },
-- { 517, 517 },
-- { 519, 519 },
-- { 521, 521 },
-- { 523, 523 },
-- { 525, 525 },
-- { 527, 527 },
-- { 529, 529 },
-- { 531, 531 },
-- { 533, 533 },
-- { 535, 535 },
-- { 537, 537 },
-- { 539, 539 },
-- { 541, 541 },
-- { 543, 543 },
-- { 545, 545 },
-- { 547, 547 },
-- { 549, 549 },
-- { 551, 551 },
-- { 553, 553 },
-- { 555, 555 },
-- { 557, 557 },
-- { 559, 559 },
-- { 561, 561 },
-- { 563, 569 },
-- { 572, 572 },
-- { 575, 576 },
-- { 578, 578 },
-- { 583, 583 },
-- { 585, 585 },
-- { 587, 587 },
-- { 589, 589 },
-- { 591, 659 },
-- { 661, 687 },
-- { 881, 881 },
-- { 883, 883 },
-- { 887, 887 },
-- { 891, 893 },
-- { 912, 912 },
-- { 940, 974 },
-- { 976, 977 },
-- { 981, 983 },
-- { 985, 985 },
-- { 987, 987 },
-- { 989, 989 },
-- { 991, 991 },
-- { 993, 993 },
-- { 995, 995 },
-- { 997, 997 },
-- { 999, 999 },
-- { 1001, 1001 },
-- { 1003, 1003 },
-- { 1005, 1005 },
-- { 1007, 1011 },
-- { 1013, 1013 },
-- { 1016, 1016 },
-- { 1019, 1020 },
-- { 1072, 1119 },
-- { 1121, 1121 },
-- { 1123, 1123 },
-- { 1125, 1125 },
-- { 1127, 1127 },
-- { 1129, 1129 },
-- { 1131, 1131 },
-- { 1133, 1133 },
-- { 1135, 1135 },
-- { 1137, 1137 },
-- { 1139, 1139 },
-- { 1141, 1141 },
-- { 1143, 1143 },
-- { 1145, 1145 },
-- { 1147, 1147 },
-- { 1149, 1149 },
-- { 1151, 1151 },
-- { 1153, 1153 },
-- { 1163, 1163 },
-- { 1165, 1165 },
-- { 1167, 1167 },
-- { 1169, 1169 },
-- { 1171, 1171 },
-- { 1173, 1173 },
-- { 1175, 1175 },
-- { 1177, 1177 },
-- { 1179, 1179 },
-- { 1181, 1181 },
-- { 1183, 1183 },
-- { 1185, 1185 },
-- { 1187, 1187 },
-- { 1189, 1189 },
-- { 1191, 1191 },
-- { 1193, 1193 },
-- { 1195, 1195 },
-- { 1197, 1197 },
-- { 1199, 1199 },
-- { 1201, 1201 },
-- { 1203, 1203 },
-- { 1205, 1205 },
-- { 1207, 1207 },
-- { 1209, 1209 },
-- { 1211, 1211 },
-- { 1213, 1213 },
-- { 1215, 1215 },
-- { 1218, 1218 },
-- { 1220, 1220 },
-- { 1222, 1222 },
-- { 1224, 1224 },
-- { 1226, 1226 },
-- { 1228, 1228 },
-- { 1230, 1231 },
-- { 1233, 1233 },
-- { 1235, 1235 },
-- { 1237, 1237 },
-- { 1239, 1239 },
-- { 1241, 1241 },
-- { 1243, 1243 },
-- { 1245, 1245 },
-- { 1247, 1247 },
-- { 1249, 1249 },
-- { 1251, 1251 },
-- { 1253, 1253 },
-- { 1255, 1255 },
-- { 1257, 1257 },
-- { 1259, 1259 },
-- { 1261, 1261 },
-- { 1263, 1263 },
-- { 1265, 1265 },
-- { 1267, 1267 },
-- { 1269, 1269 },
-- { 1271, 1271 },
-- { 1273, 1273 },
-- { 1275, 1275 },
-- { 1277, 1277 },
-- { 1279, 1279 },
-- { 1281, 1281 },
-- { 1283, 1283 },
-- { 1285, 1285 },
-- { 1287, 1287 },
-- { 1289, 1289 },
-- { 1291, 1291 },
-- { 1293, 1293 },
-- { 1295, 1295 },
-- { 1297, 1297 },
-- { 1299, 1299 },
-- { 1301, 1301 },
-- { 1303, 1303 },
-- { 1305, 1305 },
-- { 1307, 1307 },
-- { 1309, 1309 },
-- { 1311, 1311 },
-- { 1313, 1313 },
-- { 1315, 1315 },
-- { 1317, 1317 },
-- { 1319, 1319 },
-- { 1377, 1415 },
-- { 7424, 7467 },
-- { 7522, 7543 },
-- { 7545, 7578 },
-- { 7681, 7681 },
-- { 7683, 7683 },
-- { 7685, 7685 },
-- { 7687, 7687 },
-- { 7689, 7689 },
-- { 7691, 7691 },
-- { 7693, 7693 },
-- { 7695, 7695 },
-- { 7697, 7697 },
-- { 7699, 7699 },
-- { 7701, 7701 },
-- { 7703, 7703 },
-- { 7705, 7705 },
-- { 7707, 7707 },
-- { 7709, 7709 },
-- { 7711, 7711 },
-- { 7713, 7713 },
-- { 7715, 7715 },
-- { 7717, 7717 },
-- { 7719, 7719 },
-- { 7721, 7721 },
-- { 7723, 7723 },
-- { 7725, 7725 },
-- { 7727, 7727 },
-- { 7729, 7729 },
-- { 7731, 7731 },
-- { 7733, 7733 },
-- { 7735, 7735 },
-- { 7737, 7737 },
-- { 7739, 7739 },
-- { 7741, 7741 },
-- { 7743, 7743 },
-- { 7745, 7745 },
-- { 7747, 7747 },
-- { 7749, 7749 },
-- { 7751, 7751 },
-- { 7753, 7753 },
-- { 7755, 7755 },
-- { 7757, 7757 },
-- { 7759, 7759 },
-- { 7761, 7761 },
-- { 7763, 7763 },
-- { 7765, 7765 },
-- { 7767, 7767 },
-- { 7769, 7769 },
-- { 7771, 7771 },
-- { 7773, 7773 },
-- { 7775, 7775 },
-- { 7777, 7777 },
-- { 7779, 7779 },
-- { 7781, 7781 },
-- { 7783, 7783 },
-- { 7785, 7785 },
-- { 7787, 7787 },
-- { 7789, 7789 },
-- { 7791, 7791 },
-- { 7793, 7793 },
-- { 7795, 7795 },
-- { 7797, 7797 },
-- { 7799, 7799 },
-- { 7801, 7801 },
-- { 7803, 7803 },
-- { 7805, 7805 },
-- { 7807, 7807 },
-- { 7809, 7809 },
-- { 7811, 7811 },
-- { 7813, 7813 },
-- { 7815, 7815 },
-- { 7817, 7817 },
-- { 7819, 7819 },
-- { 7821, 7821 },
-- { 7823, 7823 },
-- { 7825, 7825 },
-- { 7827, 7827 },
-- { 7829, 7837 },
-- { 7839, 7839 },
-- { 7841, 7841 },
-- { 7843, 7843 },
-- { 7845, 7845 },
-- { 7847, 7847 },
-- { 7849, 7849 },
-- { 7851, 7851 },
-- { 7853, 7853 },
-- { 7855, 7855 },
-- { 7857, 7857 },
-- { 7859, 7859 },
-- { 7861, 7861 },
-- { 7863, 7863 },
-- { 7865, 7865 },
-- { 7867, 7867 },
-- { 7869, 7869 },
-- { 7871, 7871 },
-- { 7873, 7873 },
-- { 7875, 7875 },
-- { 7877, 7877 },
-- { 7879, 7879 },
-- { 7881, 7881 },
-- { 7883, 7883 },
-- { 7885, 7885 },
-- { 7887, 7887 },
-- { 7889, 7889 },
-- { 7891, 7891 },
-- { 7893, 7893 },
-- { 7895, 7895 },
-- { 7897, 7897 },
-- { 7899, 7899 },
-- { 7901, 7901 },
-- { 7903, 7903 },
-- { 7905, 7905 },
-- { 7907, 7907 },
-- { 7909, 7909 },
-- { 7911, 7911 },
-- { 7913, 7913 },
-- { 7915, 7915 },
-- { 7917, 7917 },
-- { 7919, 7919 },
-- { 7921, 7921 },
-- { 7923, 7923 },
-- { 7925, 7925 },
-- { 7927, 7927 },
-- { 7929, 7929 },
-- { 7931, 7931 },
-- { 7933, 7933 },
-- { 7935, 7943 },
-- { 7952, 7957 },
-- { 7968, 7975 },
-- { 7984, 7991 },
-- { 8000, 8005 },
-- { 8016, 8023 },
-- { 8032, 8039 },
-- { 8048, 8061 },
-- { 8064, 8071 },
-- { 8080, 8087 },
-- { 8096, 8103 },
-- { 8112, 8116 },
-- { 8118, 8119 },
-- { 8126, 8126 },
-- { 8130, 8132 },
-- { 8134, 8135 },
-- { 8144, 8147 },
-- { 8150, 8151 },
-- { 8160, 8167 },
-- { 8178, 8180 },
-- { 8182, 8183 },
-- { 8458, 8458 },
-- { 8462, 8463 },
-- { 8467, 8467 },
-- { 8495, 8495 },
-- { 8500, 8500 },
-- { 8505, 8505 },
-- { 8508, 8509 },
-- { 8518, 8521 },
-- { 8526, 8526 },
-- { 8580, 8580 },
-- { 11312, 11358 },
-- { 11361, 11361 },
-- { 11365, 11366 },
-- { 11368, 11368 },
-- { 11370, 11370 },
-- { 11372, 11372 },
-- { 11377, 11377 },
-- { 11379, 11380 },
-- { 11382, 11388 },
-- { 11393, 11393 },
-- { 11395, 11395 },
-- { 11397, 11397 },
-- { 11399, 11399 },
-- { 11401, 11401 },
-- { 11403, 11403 },
-- { 11405, 11405 },
-- { 11407, 11407 },
-- { 11409, 11409 },
-- { 11411, 11411 },
-- { 11413, 11413 },
-- { 11415, 11415 },
-- { 11417, 11417 },
-- { 11419, 11419 },
-- { 11421, 11421 },
-- { 11423, 11423 },
-- { 11425, 11425 },
-- { 11427, 11427 },
-- { 11429, 11429 },
-- { 11431, 11431 },
-- { 11433, 11433 },
-- { 11435, 11435 },
-- { 11437, 11437 },
-- { 11439, 11439 },
-- { 11441, 11441 },
-- { 11443, 11443 },
-- { 11445, 11445 },
-- { 11447, 11447 },
-- { 11449, 11449 },
-- { 11451, 11451 },
-- { 11453, 11453 },
-- { 11455, 11455 },
-- { 11457, 11457 },
-- { 11459, 11459 },
-- { 11461, 11461 },
-- { 11463, 11463 },
-- { 11465, 11465 },
-- { 11467, 11467 },
-- { 11469, 11469 },
-- { 11471, 11471 },
-- { 11473, 11473 },
-- { 11475, 11475 },
-- { 11477, 11477 },
-- { 11479, 11479 },
-- { 11481, 11481 },
-- { 11483, 11483 },
-- { 11485, 11485 },
-- { 11487, 11487 },
-- { 11489, 11489 },
-- { 11491, 11492 },
-- { 11500, 11500 },
-- { 11502, 11502 },
-- { 11520, 11557 },
-- { 42561, 42561 },
-- { 42563, 42563 },
-- { 42565, 42565 },
-- { 42567, 42567 },
-- { 42569, 42569 },
-- { 42571, 42571 },
-- { 42573, 42573 },
-- { 42575, 42575 },
-- { 42577, 42577 },
-- { 42579, 42579 },
-- { 42581, 42581 },
-- { 42583, 42583 },
-- { 42585, 42585 },
-- { 42587, 42587 },
-- { 42589, 42589 },
-- { 42591, 42591 },
-- { 42593, 42593 },
-- { 42595, 42595 },
-- { 42597, 42597 },
-- { 42599, 42599 },
-- { 42601, 42601 },
-- { 42603, 42603 },
-- { 42605, 42605 },
-- { 42625, 42625 },
-- { 42627, 42627 },
-- { 42629, 42629 },
-- { 42631, 42631 },
-- { 42633, 42633 },
-- { 42635, 42635 },
-- { 42637, 42637 },
-- { 42639, 42639 },
-- { 42641, 42641 },
-- { 42643, 42643 },
-- { 42645, 42645 },
-- { 42647, 42647 },
-- { 42787, 42787 },
-- { 42789, 42789 },
-- { 42791, 42791 },
-- { 42793, 42793 },
-- { 42795, 42795 },
-- { 42797, 42797 },
-- { 42799, 42801 },
-- { 42803, 42803 },
-- { 42805, 42805 },
-- { 42807, 42807 },
-- { 42809, 42809 },
-- { 42811, 42811 },
-- { 42813, 42813 },
-- { 42815, 42815 },
-- { 42817, 42817 },
-- { 42819, 42819 },
-- { 42821, 42821 },
-- { 42823, 42823 },
-- { 42825, 42825 },
-- { 42827, 42827 },
-- { 42829, 42829 },
-- { 42831, 42831 },
-- { 42833, 42833 },
-- { 42835, 42835 },
-- { 42837, 42837 },
-- { 42839, 42839 },
-- { 42841, 42841 },
-- { 42843, 42843 },
-- { 42845, 42845 },
-- { 42847, 42847 },
-- { 42849, 42849 },
-- { 42851, 42851 },
-- { 42853, 42853 },
-- { 42855, 42855 },
-- { 42857, 42857 },
-- { 42859, 42859 },
-- { 42861, 42861 },
-- { 42863, 42863 },
-- { 42865, 42872 },
-- { 42874, 42874 },
-- { 42876, 42876 },
-- { 42879, 42879 },
-- { 42881, 42881 },
-- { 42883, 42883 },
-- { 42885, 42885 },
-- { 42887, 42887 },
-- { 42892, 42892 },
-- { 42894, 42894 },
-- { 42897, 42897 },
-- { 42913, 42913 },
-- { 42915, 42915 },
-- { 42917, 42917 },
-- { 42919, 42919 },
-- { 42921, 42921 },
-- { 43002, 43002 },
-- { 64256, 64262 },
-- { 64275, 64279 },
-- { 65345, 65370 },
--};
--static URange32 Ll_range32[] = {
-- { 66600, 66639 },
-- { 119834, 119859 },
-- { 119886, 119892 },
-- { 119894, 119911 },
-- { 119938, 119963 },
-- { 119990, 119993 },
-- { 119995, 119995 },
-- { 119997, 120003 },
-- { 120005, 120015 },
-- { 120042, 120067 },
-- { 120094, 120119 },
-- { 120146, 120171 },
-- { 120198, 120223 },
-- { 120250, 120275 },
-- { 120302, 120327 },
-- { 120354, 120379 },
-- { 120406, 120431 },
-- { 120458, 120485 },
-- { 120514, 120538 },
-- { 120540, 120545 },
-- { 120572, 120596 },
-- { 120598, 120603 },
-- { 120630, 120654 },
-- { 120656, 120661 },
-- { 120688, 120712 },
-- { 120714, 120719 },
-- { 120746, 120770 },
-- { 120772, 120777 },
-- { 120779, 120779 },
--};
--static URange16 Lm_range16[] = {
-- { 688, 705 },
-- { 710, 721 },
-- { 736, 740 },
-- { 748, 748 },
-- { 750, 750 },
-- { 884, 884 },
-- { 890, 890 },
-- { 1369, 1369 },
-- { 1600, 1600 },
-- { 1765, 1766 },
-- { 2036, 2037 },
-- { 2042, 2042 },
-- { 2074, 2074 },
-- { 2084, 2084 },
-- { 2088, 2088 },
-- { 2417, 2417 },
-- { 3654, 3654 },
-- { 3782, 3782 },
-- { 4348, 4348 },
-- { 6103, 6103 },
-- { 6211, 6211 },
-- { 6823, 6823 },
-- { 7288, 7293 },
-- { 7468, 7521 },
-- { 7544, 7544 },
-- { 7579, 7615 },
-- { 8305, 8305 },
-- { 8319, 8319 },
-- { 8336, 8348 },
-- { 11389, 11389 },
-- { 11631, 11631 },
-- { 11823, 11823 },
-- { 12293, 12293 },
-- { 12337, 12341 },
-- { 12347, 12347 },
-- { 12445, 12446 },
-- { 12540, 12542 },
-- { 40981, 40981 },
-- { 42232, 42237 },
-- { 42508, 42508 },
-- { 42623, 42623 },
-- { 42775, 42783 },
-- { 42864, 42864 },
-- { 42888, 42888 },
-- { 43471, 43471 },
-- { 43632, 43632 },
-- { 43741, 43741 },
-- { 65392, 65392 },
-- { 65438, 65439 },
--};
--static URange16 Nd_range16[] = {
-- { 48, 57 },
-- { 1632, 1641 },
-- { 1776, 1785 },
-- { 1984, 1993 },
-- { 2406, 2415 },
-- { 2534, 2543 },
-- { 2662, 2671 },
-- { 2790, 2799 },
-- { 2918, 2927 },
-- { 3046, 3055 },
-- { 3174, 3183 },
-- { 3302, 3311 },
-- { 3430, 3439 },
-- { 3664, 3673 },
-- { 3792, 3801 },
-- { 3872, 3881 },
-- { 4160, 4169 },
-- { 4240, 4249 },
-- { 6112, 6121 },
-- { 6160, 6169 },
-- { 6470, 6479 },
-- { 6608, 6617 },
-- { 6784, 6793 },
-- { 6800, 6809 },
-- { 6992, 7001 },
-- { 7088, 7097 },
-- { 7232, 7241 },
-- { 7248, 7257 },
-- { 42528, 42537 },
-- { 43216, 43225 },
-- { 43264, 43273 },
-- { 43472, 43481 },
-- { 43600, 43609 },
-- { 44016, 44025 },
-- { 65296, 65305 },
--};
--static URange32 Nd_range32[] = {
-- { 66720, 66729 },
-- { 69734, 69743 },
-- { 120782, 120831 },
--};
--static URange16 Pc_range16[] = {
-- { 95, 95 },
-- { 8255, 8256 },
-- { 8276, 8276 },
-- { 65075, 65076 },
-- { 65101, 65103 },
-- { 65343, 65343 },
--};
--static URange16 Lt_range16[] = {
-- { 453, 453 },
-- { 456, 456 },
-- { 459, 459 },
-- { 498, 498 },
-- { 8072, 8079 },
-- { 8088, 8095 },
-- { 8104, 8111 },
-- { 8124, 8124 },
-- { 8140, 8140 },
-- { 8188, 8188 },
--};
--static URange16 Lu_range16[] = {
-- { 65, 90 },
-- { 192, 214 },
-- { 216, 222 },
-- { 256, 256 },
-- { 258, 258 },
-- { 260, 260 },
-- { 262, 262 },
-- { 264, 264 },
-- { 266, 266 },
-- { 268, 268 },
-- { 270, 270 },
-- { 272, 272 },
-- { 274, 274 },
-- { 276, 276 },
-- { 278, 278 },
-- { 280, 280 },
-- { 282, 282 },
-- { 284, 284 },
-- { 286, 286 },
-- { 288, 288 },
-- { 290, 290 },
-- { 292, 292 },
-- { 294, 294 },
-- { 296, 296 },
-- { 298, 298 },
-- { 300, 300 },
-- { 302, 302 },
-- { 304, 304 },
-- { 306, 306 },
-- { 308, 308 },
-- { 310, 310 },
-- { 313, 313 },
-- { 315, 315 },
-- { 317, 317 },
-- { 319, 319 },
-- { 321, 321 },
-- { 323, 323 },
-- { 325, 325 },
-- { 327, 327 },
-- { 330, 330 },
-- { 332, 332 },
-- { 334, 334 },
-- { 336, 336 },
-- { 338, 338 },
-- { 340, 340 },
-- { 342, 342 },
-- { 344, 344 },
-- { 346, 346 },
-- { 348, 348 },
-- { 350, 350 },
-- { 352, 352 },
-- { 354, 354 },
-- { 356, 356 },
-- { 358, 358 },
-- { 360, 360 },
-- { 362, 362 },
-- { 364, 364 },
-- { 366, 366 },
-- { 368, 368 },
-- { 370, 370 },
-- { 372, 372 },
-- { 374, 374 },
-- { 376, 377 },
-- { 379, 379 },
-- { 381, 381 },
-- { 385, 386 },
-- { 388, 388 },
-- { 390, 391 },
-- { 393, 395 },
-- { 398, 401 },
-- { 403, 404 },
-- { 406, 408 },
-- { 412, 413 },
-- { 415, 416 },
-- { 418, 418 },
-- { 420, 420 },
-- { 422, 423 },
-- { 425, 425 },
-- { 428, 428 },
-- { 430, 431 },
-- { 433, 435 },
-- { 437, 437 },
-- { 439, 440 },
-- { 444, 444 },
-- { 452, 452 },
-- { 455, 455 },
-- { 458, 458 },
-- { 461, 461 },
-- { 463, 463 },
-- { 465, 465 },
-- { 467, 467 },
-- { 469, 469 },
-- { 471, 471 },
-- { 473, 473 },
-- { 475, 475 },
-- { 478, 478 },
-- { 480, 480 },
-- { 482, 482 },
-- { 484, 484 },
-- { 486, 486 },
-- { 488, 488 },
-- { 490, 490 },
-- { 492, 492 },
-- { 494, 494 },
-- { 497, 497 },
-- { 500, 500 },
-- { 502, 504 },
-- { 506, 506 },
-- { 508, 508 },
-- { 510, 510 },
-- { 512, 512 },
-- { 514, 514 },
-- { 516, 516 },
-- { 518, 518 },
-- { 520, 520 },
-- { 522, 522 },
-- { 524, 524 },
-- { 526, 526 },
-- { 528, 528 },
-- { 530, 530 },
-- { 532, 532 },
-- { 534, 534 },
-- { 536, 536 },
-- { 538, 538 },
-- { 540, 540 },
-- { 542, 542 },
-- { 544, 544 },
-- { 546, 546 },
-- { 548, 548 },
-- { 550, 550 },
-- { 552, 552 },
-- { 554, 554 },
-- { 556, 556 },
-- { 558, 558 },
-- { 560, 560 },
-- { 562, 562 },
-- { 570, 571 },
-- { 573, 574 },
-- { 577, 577 },
-- { 579, 582 },
-- { 584, 584 },
-- { 586, 586 },
-- { 588, 588 },
-- { 590, 590 },
-- { 880, 880 },
-- { 882, 882 },
-- { 886, 886 },
-- { 902, 902 },
-- { 904, 906 },
-- { 908, 908 },
-- { 910, 911 },
-- { 913, 929 },
-- { 931, 939 },
-- { 975, 975 },
-- { 978, 980 },
-- { 984, 984 },
-- { 986, 986 },
-- { 988, 988 },
-- { 990, 990 },
-- { 992, 992 },
-- { 994, 994 },
-- { 996, 996 },
-- { 998, 998 },
-- { 1000, 1000 },
-- { 1002, 1002 },
-- { 1004, 1004 },
-- { 1006, 1006 },
-- { 1012, 1012 },
-- { 1015, 1015 },
-- { 1017, 1018 },
-- { 1021, 1071 },
-- { 1120, 1120 },
-- { 1122, 1122 },
-- { 1124, 1124 },
-- { 1126, 1126 },
-- { 1128, 1128 },
-- { 1130, 1130 },
-- { 1132, 1132 },
-- { 1134, 1134 },
-- { 1136, 1136 },
-- { 1138, 1138 },
-- { 1140, 1140 },
-- { 1142, 1142 },
-- { 1144, 1144 },
-- { 1146, 1146 },
-- { 1148, 1148 },
-- { 1150, 1150 },
-- { 1152, 1152 },
-- { 1162, 1162 },
-- { 1164, 1164 },
-- { 1166, 1166 },
-- { 1168, 1168 },
-- { 1170, 1170 },
-- { 1172, 1172 },
-- { 1174, 1174 },
-- { 1176, 1176 },
-- { 1178, 1178 },
-- { 1180, 1180 },
-- { 1182, 1182 },
-- { 1184, 1184 },
-- { 1186, 1186 },
-- { 1188, 1188 },
-- { 1190, 1190 },
-- { 1192, 1192 },
-- { 1194, 1194 },
-- { 1196, 1196 },
-- { 1198, 1198 },
-- { 1200, 1200 },
-- { 1202, 1202 },
-- { 1204, 1204 },
-- { 1206, 1206 },
-- { 1208, 1208 },
-- { 1210, 1210 },
-- { 1212, 1212 },
-- { 1214, 1214 },
-- { 1216, 1217 },
-- { 1219, 1219 },
-- { 1221, 1221 },
-- { 1223, 1223 },
-- { 1225, 1225 },
-- { 1227, 1227 },
-- { 1229, 1229 },
-- { 1232, 1232 },
-- { 1234, 1234 },
-- { 1236, 1236 },
-- { 1238, 1238 },
-- { 1240, 1240 },
-- { 1242, 1242 },
-- { 1244, 1244 },
-- { 1246, 1246 },
-- { 1248, 1248 },
-- { 1250, 1250 },
-- { 1252, 1252 },
-- { 1254, 1254 },
-- { 1256, 1256 },
-- { 1258, 1258 },
-- { 1260, 1260 },
-- { 1262, 1262 },
-- { 1264, 1264 },
-- { 1266, 1266 },
-- { 1268, 1268 },
-- { 1270, 1270 },
-- { 1272, 1272 },
-- { 1274, 1274 },
-- { 1276, 1276 },
-- { 1278, 1278 },
-- { 1280, 1280 },
-- { 1282, 1282 },
-- { 1284, 1284 },
-- { 1286, 1286 },
-- { 1288, 1288 },
-- { 1290, 1290 },
-- { 1292, 1292 },
-- { 1294, 1294 },
-- { 1296, 1296 },
-- { 1298, 1298 },
-- { 1300, 1300 },
-- { 1302, 1302 },
-- { 1304, 1304 },
-- { 1306, 1306 },
-- { 1308, 1308 },
-- { 1310, 1310 },
-- { 1312, 1312 },
-- { 1314, 1314 },
-- { 1316, 1316 },
-- { 1318, 1318 },
-- { 1329, 1366 },
-- { 4256, 4293 },
-- { 7680, 7680 },
-- { 7682, 7682 },
-- { 7684, 7684 },
-- { 7686, 7686 },
-- { 7688, 7688 },
-- { 7690, 7690 },
-- { 7692, 7692 },
-- { 7694, 7694 },
-- { 7696, 7696 },
-- { 7698, 7698 },
-- { 7700, 7700 },
-- { 7702, 7702 },
-- { 7704, 7704 },
-- { 7706, 7706 },
-- { 7708, 7708 },
-- { 7710, 7710 },
-- { 7712, 7712 },
-- { 7714, 7714 },
-- { 7716, 7716 },
-- { 7718, 7718 },
-- { 7720, 7720 },
-- { 7722, 7722 },
-- { 7724, 7724 },
-- { 7726, 7726 },
-- { 7728, 7728 },
-- { 7730, 7730 },
-- { 7732, 7732 },
-- { 7734, 7734 },
-- { 7736, 7736 },
-- { 7738, 7738 },
-- { 7740, 7740 },
-- { 7742, 7742 },
-- { 7744, 7744 },
-- { 7746, 7746 },
-- { 7748, 7748 },
-- { 7750, 7750 },
-- { 7752, 7752 },
-- { 7754, 7754 },
-- { 7756, 7756 },
-- { 7758, 7758 },
-- { 7760, 7760 },
-- { 7762, 7762 },
-- { 7764, 7764 },
-- { 7766, 7766 },
-- { 7768, 7768 },
-- { 7770, 7770 },
-- { 7772, 7772 },
-- { 7774, 7774 },
-- { 7776, 7776 },
-- { 7778, 7778 },
-- { 7780, 7780 },
-- { 7782, 7782 },
-- { 7784, 7784 },
-- { 7786, 7786 },
-- { 7788, 7788 },
-- { 7790, 7790 },
-- { 7792, 7792 },
-- { 7794, 7794 },
-- { 7796, 7796 },
-- { 7798, 7798 },
-- { 7800, 7800 },
-- { 7802, 7802 },
-- { 7804, 7804 },
-- { 7806, 7806 },
-- { 7808, 7808 },
-- { 7810, 7810 },
-- { 7812, 7812 },
-- { 7814, 7814 },
-- { 7816, 7816 },
-- { 7818, 7818 },
-- { 7820, 7820 },
-- { 7822, 7822 },
-- { 7824, 7824 },
-- { 7826, 7826 },
-- { 7828, 7828 },
-- { 7838, 7838 },
-- { 7840, 7840 },
-- { 7842, 7842 },
-- { 7844, 7844 },
-- { 7846, 7846 },
-- { 7848, 7848 },
-- { 7850, 7850 },
-- { 7852, 7852 },
-- { 7854, 7854 },
-- { 7856, 7856 },
-- { 7858, 7858 },
-- { 7860, 7860 },
-- { 7862, 7862 },
-- { 7864, 7864 },
-- { 7866, 7866 },
-- { 7868, 7868 },
-- { 7870, 7870 },
-- { 7872, 7872 },
-- { 7874, 7874 },
-- { 7876, 7876 },
-- { 7878, 7878 },
-- { 7880, 7880 },
-- { 7882, 7882 },
-- { 7884, 7884 },
-- { 7886, 7886 },
-- { 7888, 7888 },
-- { 7890, 7890 },
-- { 7892, 7892 },
-- { 7894, 7894 },
-- { 7896, 7896 },
-- { 7898, 7898 },
-- { 7900, 7900 },
-- { 7902, 7902 },
-- { 7904, 7904 },
-- { 7906, 7906 },
-- { 7908, 7908 },
-- { 7910, 7910 },
-- { 7912, 7912 },
-- { 7914, 7914 },
-- { 7916, 7916 },
-- { 7918, 7918 },
-- { 7920, 7920 },
-- { 7922, 7922 },
-- { 7924, 7924 },
-- { 7926, 7926 },
-- { 7928, 7928 },
-- { 7930, 7930 },
-- { 7932, 7932 },
-- { 7934, 7934 },
-- { 7944, 7951 },
-- { 7960, 7965 },
-- { 7976, 7983 },
-- { 7992, 7999 },
-- { 8008, 8013 },
-- { 8025, 8025 },
-- { 8027, 8027 },
-- { 8029, 8029 },
-- { 8031, 8031 },
-- { 8040, 8047 },
-- { 8120, 8123 },
-- { 8136, 8139 },
-- { 8152, 8155 },
-- { 8168, 8172 },
-- { 8184, 8187 },
-- { 8450, 8450 },
-- { 8455, 8455 },
-- { 8459, 8461 },
-- { 8464, 8466 },
-- { 8469, 8469 },
-- { 8473, 8477 },
-- { 8484, 8484 },
-- { 8486, 8486 },
-- { 8488, 8488 },
-- { 8490, 8493 },
-- { 8496, 8499 },
-- { 8510, 8511 },
-- { 8517, 8517 },
-- { 8579, 8579 },
-- { 11264, 11310 },
-- { 11360, 11360 },
-- { 11362, 11364 },
-- { 11367, 11367 },
-- { 11369, 11369 },
-- { 11371, 11371 },
-- { 11373, 11376 },
-- { 11378, 11378 },
-- { 11381, 11381 },
-- { 11390, 11392 },
-- { 11394, 11394 },
-- { 11396, 11396 },
-- { 11398, 11398 },
-- { 11400, 11400 },
-- { 11402, 11402 },
-- { 11404, 11404 },
-- { 11406, 11406 },
-- { 11408, 11408 },
-- { 11410, 11410 },
-- { 11412, 11412 },
-- { 11414, 11414 },
-- { 11416, 11416 },
-- { 11418, 11418 },
-- { 11420, 11420 },
-- { 11422, 11422 },
-- { 11424, 11424 },
-- { 11426, 11426 },
-- { 11428, 11428 },
-- { 11430, 11430 },
-- { 11432, 11432 },
-- { 11434, 11434 },
-- { 11436, 11436 },
-- { 11438, 11438 },
-- { 11440, 11440 },
-- { 11442, 11442 },
-- { 11444, 11444 },
-- { 11446, 11446 },
-- { 11448, 11448 },
-- { 11450, 11450 },
-- { 11452, 11452 },
-- { 11454, 11454 },
-- { 11456, 11456 },
-- { 11458, 11458 },
-- { 11460, 11460 },
-- { 11462, 11462 },
-- { 11464, 11464 },
-- { 11466, 11466 },
-- { 11468, 11468 },
-- { 11470, 11470 },
-- { 11472, 11472 },
-- { 11474, 11474 },
-- { 11476, 11476 },
-- { 11478, 11478 },
-- { 11480, 11480 },
-- { 11482, 11482 },
-- { 11484, 11484 },
-- { 11486, 11486 },
-- { 11488, 11488 },
-- { 11490, 11490 },
-- { 11499, 11499 },
-- { 11501, 11501 },
-- { 42560, 42560 },
-- { 42562, 42562 },
-- { 42564, 42564 },
-- { 42566, 42566 },
-- { 42568, 42568 },
-- { 42570, 42570 },
-- { 42572, 42572 },
-- { 42574, 42574 },
-- { 42576, 42576 },
-- { 42578, 42578 },
-- { 42580, 42580 },
-- { 42582, 42582 },
-- { 42584, 42584 },
-- { 42586, 42586 },
-- { 42588, 42588 },
-- { 42590, 42590 },
-- { 42592, 42592 },
-- { 42594, 42594 },
-- { 42596, 42596 },
-- { 42598, 42598 },
-- { 42600, 42600 },
-- { 42602, 42602 },
-- { 42604, 42604 },
-- { 42624, 42624 },
-- { 42626, 42626 },
-- { 42628, 42628 },
-- { 42630, 42630 },
-- { 42632, 42632 },
-- { 42634, 42634 },
-- { 42636, 42636 },
-- { 42638, 42638 },
-- { 42640, 42640 },
-- { 42642, 42642 },
-- { 42644, 42644 },
-- { 42646, 42646 },
-- { 42786, 42786 },
-- { 42788, 42788 },
-- { 42790, 42790 },
-- { 42792, 42792 },
-- { 42794, 42794 },
-- { 42796, 42796 },
-- { 42798, 42798 },
-- { 42802, 42802 },
-- { 42804, 42804 },
-- { 42806, 42806 },
-- { 42808, 42808 },
-- { 42810, 42810 },
-- { 42812, 42812 },
-- { 42814, 42814 },
-- { 42816, 42816 },
-- { 42818, 42818 },
-- { 42820, 42820 },
-- { 42822, 42822 },
-- { 42824, 42824 },
-- { 42826, 42826 },
-- { 42828, 42828 },
-- { 42830, 42830 },
-- { 42832, 42832 },
-- { 42834, 42834 },
-- { 42836, 42836 },
-- { 42838, 42838 },
-- { 42840, 42840 },
-- { 42842, 42842 },
-- { 42844, 42844 },
-- { 42846, 42846 },
-- { 42848, 42848 },
-- { 42850, 42850 },
-- { 42852, 42852 },
-- { 42854, 42854 },
-- { 42856, 42856 },
-- { 42858, 42858 },
-- { 42860, 42860 },
-- { 42862, 42862 },
-- { 42873, 42873 },
-- { 42875, 42875 },
-- { 42877, 42878 },
-- { 42880, 42880 },
-- { 42882, 42882 },
-- { 42884, 42884 },
-- { 42886, 42886 },
-- { 42891, 42891 },
-- { 42893, 42893 },
-- { 42896, 42896 },
-- { 42912, 42912 },
-- { 42914, 42914 },
-- { 42916, 42916 },
-- { 42918, 42918 },
-- { 42920, 42920 },
-- { 65313, 65338 },
--};
--static URange32 Lu_range32[] = {
-- { 66560, 66599 },
-- { 119808, 119833 },
-- { 119860, 119885 },
-- { 119912, 119937 },
-- { 119964, 119964 },
-- { 119966, 119967 },
-- { 119970, 119970 },
-- { 119973, 119974 },
-- { 119977, 119980 },
-- { 119982, 119989 },
-- { 120016, 120041 },
-- { 120068, 120069 },
-- { 120071, 120074 },
-- { 120077, 120084 },
-- { 120086, 120092 },
-- { 120120, 120121 },
-- { 120123, 120126 },
-- { 120128, 120132 },
-- { 120134, 120134 },
-- { 120138, 120144 },
-- { 120172, 120197 },
-- { 120224, 120249 },
-- { 120276, 120301 },
-- { 120328, 120353 },
-- { 120380, 120405 },
-- { 120432, 120457 },
-- { 120488, 120512 },
-- { 120546, 120570 },
-- { 120604, 120628 },
-- { 120662, 120686 },
-- { 120720, 120744 },
-- { 120778, 120778 },
--};
--static URange16 Pf_range16[] = {
-- { 187, 187 },
-- { 8217, 8217 },
-- { 8221, 8221 },
-- { 8250, 8250 },
-- { 11779, 11779 },
-- { 11781, 11781 },
-- { 11786, 11786 },
-- { 11789, 11789 },
-- { 11805, 11805 },
-- { 11809, 11809 },
--};
--static URange16 Pd_range16[] = {
-- { 45, 45 },
-- { 1418, 1418 },
-- { 1470, 1470 },
-- { 5120, 5120 },
-- { 6150, 6150 },
-- { 8208, 8213 },
-- { 11799, 11799 },
-- { 11802, 11802 },
-- { 12316, 12316 },
-- { 12336, 12336 },
-- { 12448, 12448 },
-- { 65073, 65074 },
-- { 65112, 65112 },
-- { 65123, 65123 },
-- { 65293, 65293 },
--};
--static URange16 Pe_range16[] = {
-- { 41, 41 },
-- { 93, 93 },
-- { 125, 125 },
-- { 3899, 3899 },
-- { 3901, 3901 },
-- { 5788, 5788 },
-- { 8262, 8262 },
-- { 8318, 8318 },
-- { 8334, 8334 },
-- { 9002, 9002 },
-- { 10089, 10089 },
-- { 10091, 10091 },
-- { 10093, 10093 },
-- { 10095, 10095 },
-- { 10097, 10097 },
-- { 10099, 10099 },
-- { 10101, 10101 },
-- { 10182, 10182 },
-- { 10215, 10215 },
-- { 10217, 10217 },
-- { 10219, 10219 },
-- { 10221, 10221 },
-- { 10223, 10223 },
-- { 10628, 10628 },
-- { 10630, 10630 },
-- { 10632, 10632 },
-- { 10634, 10634 },
-- { 10636, 10636 },
-- { 10638, 10638 },
-- { 10640, 10640 },
-- { 10642, 10642 },
-- { 10644, 10644 },
-- { 10646, 10646 },
-- { 10648, 10648 },
-- { 10713, 10713 },
-- { 10715, 10715 },
-- { 10749, 10749 },
-- { 11811, 11811 },
-- { 11813, 11813 },
-- { 11815, 11815 },
-- { 11817, 11817 },
-- { 12297, 12297 },
-- { 12299, 12299 },
-- { 12301, 12301 },
-- { 12303, 12303 },
-- { 12305, 12305 },
-- { 12309, 12309 },
-- { 12311, 12311 },
-- { 12313, 12313 },
-- { 12315, 12315 },
-- { 12318, 12319 },
-- { 64831, 64831 },
-- { 65048, 65048 },
-- { 65078, 65078 },
-- { 65080, 65080 },
-- { 65082, 65082 },
-- { 65084, 65084 },
-- { 65086, 65086 },
-- { 65088, 65088 },
-- { 65090, 65090 },
-- { 65092, 65092 },
-- { 65096, 65096 },
-- { 65114, 65114 },
-- { 65116, 65116 },
-- { 65118, 65118 },
-- { 65289, 65289 },
-- { 65341, 65341 },
-- { 65373, 65373 },
-- { 65376, 65376 },
-- { 65379, 65379 },
--};
--static URange16 Pi_range16[] = {
-- { 171, 171 },
-- { 8216, 8216 },
-- { 8219, 8220 },
-- { 8223, 8223 },
-- { 8249, 8249 },
-- { 11778, 11778 },
-- { 11780, 11780 },
-- { 11785, 11785 },
-- { 11788, 11788 },
-- { 11804, 11804 },
-- { 11808, 11808 },
--};
--static URange16 Po_range16[] = {
-- { 33, 35 },
-- { 37, 39 },
-- { 42, 42 },
-- { 44, 44 },
-- { 46, 47 },
-- { 58, 59 },
-- { 63, 64 },
-- { 92, 92 },
-- { 161, 161 },
-- { 183, 183 },
-- { 191, 191 },
-- { 894, 894 },
-- { 903, 903 },
-- { 1370, 1375 },
-- { 1417, 1417 },
-- { 1472, 1472 },
-- { 1475, 1475 },
-- { 1478, 1478 },
-- { 1523, 1524 },
-- { 1545, 1546 },
-- { 1548, 1549 },
-- { 1563, 1563 },
-- { 1566, 1567 },
-- { 1642, 1645 },
-- { 1748, 1748 },
-- { 1792, 1805 },
-- { 2039, 2041 },
-- { 2096, 2110 },
-- { 2142, 2142 },
-- { 2404, 2405 },
-- { 2416, 2416 },
-- { 3572, 3572 },
-- { 3663, 3663 },
-- { 3674, 3675 },
-- { 3844, 3858 },
-- { 3973, 3973 },
-- { 4048, 4052 },
-- { 4057, 4058 },
-- { 4170, 4175 },
-- { 4347, 4347 },
-- { 4961, 4968 },
-- { 5741, 5742 },
-- { 5867, 5869 },
-- { 5941, 5942 },
-- { 6100, 6102 },
-- { 6104, 6106 },
-- { 6144, 6149 },
-- { 6151, 6154 },
-- { 6468, 6469 },
-- { 6686, 6687 },
-- { 6816, 6822 },
-- { 6824, 6829 },
-- { 7002, 7008 },
-- { 7164, 7167 },
-- { 7227, 7231 },
-- { 7294, 7295 },
-- { 7379, 7379 },
-- { 8214, 8215 },
-- { 8224, 8231 },
-- { 8240, 8248 },
-- { 8251, 8254 },
-- { 8257, 8259 },
-- { 8263, 8273 },
-- { 8275, 8275 },
-- { 8277, 8286 },
-- { 11513, 11516 },
-- { 11518, 11519 },
-- { 11632, 11632 },
-- { 11776, 11777 },
-- { 11782, 11784 },
-- { 11787, 11787 },
-- { 11790, 11798 },
-- { 11800, 11801 },
-- { 11803, 11803 },
-- { 11806, 11807 },
-- { 11818, 11822 },
-- { 11824, 11825 },
-- { 12289, 12291 },
-- { 12349, 12349 },
-- { 12539, 12539 },
-- { 42238, 42239 },
-- { 42509, 42511 },
-- { 42611, 42611 },
-- { 42622, 42622 },
-- { 42738, 42743 },
-- { 43124, 43127 },
-- { 43214, 43215 },
-- { 43256, 43258 },
-- { 43310, 43311 },
-- { 43359, 43359 },
-- { 43457, 43469 },
-- { 43486, 43487 },
-- { 43612, 43615 },
-- { 43742, 43743 },
-- { 44011, 44011 },
-- { 65040, 65046 },
-- { 65049, 65049 },
-- { 65072, 65072 },
-- { 65093, 65094 },
-- { 65097, 65100 },
-- { 65104, 65106 },
-- { 65108, 65111 },
-- { 65119, 65121 },
-- { 65128, 65128 },
-- { 65130, 65131 },
-- { 65281, 65283 },
-- { 65285, 65287 },
-- { 65290, 65290 },
-- { 65292, 65292 },
-- { 65294, 65295 },
-- { 65306, 65307 },
-- { 65311, 65312 },
-- { 65340, 65340 },
-- { 65377, 65377 },
-- { 65380, 65381 },
--};
--static URange32 Po_range32[] = {
-- { 65792, 65793 },
-- { 66463, 66463 },
-- { 66512, 66512 },
-- { 67671, 67671 },
-- { 67871, 67871 },
-- { 67903, 67903 },
-- { 68176, 68184 },
-- { 68223, 68223 },
-- { 68409, 68415 },
-- { 69703, 69709 },
-- { 69819, 69820 },
-- { 69822, 69825 },
-- { 74864, 74867 },
--};
--static URange16 Me_range16[] = {
-- { 1160, 1161 },
-- { 8413, 8416 },
-- { 8418, 8420 },
-- { 42608, 42610 },
--};
--static URange16 C_range16[] = {
-- { 0, 31 },
-- { 127, 159 },
-- { 173, 173 },
-- { 1536, 1539 },
-- { 1757, 1757 },
-- { 1807, 1807 },
-- { 6068, 6069 },
-- { 8203, 8207 },
-- { 8234, 8238 },
-- { 8288, 8292 },
-- { 8298, 8303 },
-- { 55296, 63743 },
-- { 65279, 65279 },
-- { 65529, 65531 },
--};
--static URange32 C_range32[] = {
-- { 69821, 69821 },
-- { 119155, 119162 },
-- { 917505, 917505 },
-- { 917536, 917631 },
-- { 983040, 1048573 },
-- { 1048576, 1114109 },
--};
--static URange16 Mc_range16[] = {
-- { 2307, 2307 },
-- { 2363, 2363 },
-- { 2366, 2368 },
-- { 2377, 2380 },
-- { 2382, 2383 },
-- { 2434, 2435 },
-- { 2494, 2496 },
-- { 2503, 2504 },
-- { 2507, 2508 },
-- { 2519, 2519 },
-- { 2563, 2563 },
-- { 2622, 2624 },
-- { 2691, 2691 },
-- { 2750, 2752 },
-- { 2761, 2761 },
-- { 2763, 2764 },
-- { 2818, 2819 },
-- { 2878, 2878 },
-- { 2880, 2880 },
-- { 2887, 2888 },
-- { 2891, 2892 },
-- { 2903, 2903 },
-- { 3006, 3007 },
-- { 3009, 3010 },
-- { 3014, 3016 },
-- { 3018, 3020 },
-- { 3031, 3031 },
-- { 3073, 3075 },
-- { 3137, 3140 },
-- { 3202, 3203 },
-- { 3262, 3262 },
-- { 3264, 3268 },
-- { 3271, 3272 },
-- { 3274, 3275 },
-- { 3285, 3286 },
-- { 3330, 3331 },
-- { 3390, 3392 },
-- { 3398, 3400 },
-- { 3402, 3404 },
-- { 3415, 3415 },
-- { 3458, 3459 },
-- { 3535, 3537 },
-- { 3544, 3551 },
-- { 3570, 3571 },
-- { 3902, 3903 },
-- { 3967, 3967 },
-- { 4139, 4140 },
-- { 4145, 4145 },
-- { 4152, 4152 },
-- { 4155, 4156 },
-- { 4182, 4183 },
-- { 4194, 4196 },
-- { 4199, 4205 },
-- { 4227, 4228 },
-- { 4231, 4236 },
-- { 4239, 4239 },
-- { 4250, 4252 },
-- { 6070, 6070 },
-- { 6078, 6085 },
-- { 6087, 6088 },
-- { 6435, 6438 },
-- { 6441, 6443 },
-- { 6448, 6449 },
-- { 6451, 6456 },
-- { 6576, 6592 },
-- { 6600, 6601 },
-- { 6681, 6683 },
-- { 6741, 6741 },
-- { 6743, 6743 },
-- { 6753, 6753 },
-- { 6755, 6756 },
-- { 6765, 6770 },
-- { 6916, 6916 },
-- { 6965, 6965 },
-- { 6971, 6971 },
-- { 6973, 6977 },
-- { 6979, 6980 },
-- { 7042, 7042 },
-- { 7073, 7073 },
-- { 7078, 7079 },
-- { 7082, 7082 },
-- { 7143, 7143 },
-- { 7146, 7148 },
-- { 7150, 7150 },
-- { 7154, 7155 },
-- { 7204, 7211 },
-- { 7220, 7221 },
-- { 7393, 7393 },
-- { 7410, 7410 },
-- { 43043, 43044 },
-- { 43047, 43047 },
-- { 43136, 43137 },
-- { 43188, 43203 },
-- { 43346, 43347 },
-- { 43395, 43395 },
-- { 43444, 43445 },
-- { 43450, 43451 },
-- { 43453, 43456 },
-- { 43567, 43568 },
-- { 43571, 43572 },
-- { 43597, 43597 },
-- { 43643, 43643 },
-- { 44003, 44004 },
-- { 44006, 44007 },
-- { 44009, 44010 },
-- { 44012, 44012 },
--};
--static URange32 Mc_range32[] = {
-- { 69632, 69632 },
-- { 69634, 69634 },
-- { 69762, 69762 },
-- { 69808, 69810 },
-- { 69815, 69816 },
-- { 119141, 119142 },
-- { 119149, 119154 },
--};
--static URange16 Mn_range16[] = {
-- { 768, 879 },
-- { 1155, 1159 },
-- { 1425, 1469 },
-- { 1471, 1471 },
-- { 1473, 1474 },
-- { 1476, 1477 },
-- { 1479, 1479 },
-- { 1552, 1562 },
-- { 1611, 1631 },
-- { 1648, 1648 },
-- { 1750, 1756 },
-- { 1759, 1764 },
-- { 1767, 1768 },
-- { 1770, 1773 },
-- { 1809, 1809 },
-- { 1840, 1866 },
-- { 1958, 1968 },
-- { 2027, 2035 },
-- { 2070, 2073 },
-- { 2075, 2083 },
-- { 2085, 2087 },
-- { 2089, 2093 },
-- { 2137, 2139 },
-- { 2304, 2306 },
-- { 2362, 2362 },
-- { 2364, 2364 },
-- { 2369, 2376 },
-- { 2381, 2381 },
-- { 2385, 2391 },
-- { 2402, 2403 },
-- { 2433, 2433 },
-- { 2492, 2492 },
-- { 2497, 2500 },
-- { 2509, 2509 },
-- { 2530, 2531 },
-- { 2561, 2562 },
-- { 2620, 2620 },
-- { 2625, 2626 },
-- { 2631, 2632 },
-- { 2635, 2637 },
-- { 2641, 2641 },
-- { 2672, 2673 },
-- { 2677, 2677 },
-- { 2689, 2690 },
-- { 2748, 2748 },
-- { 2753, 2757 },
-- { 2759, 2760 },
-- { 2765, 2765 },
-- { 2786, 2787 },
-- { 2817, 2817 },
-- { 2876, 2876 },
-- { 2879, 2879 },
-- { 2881, 2884 },
-- { 2893, 2893 },
-- { 2902, 2902 },
-- { 2914, 2915 },
-- { 2946, 2946 },
-- { 3008, 3008 },
-- { 3021, 3021 },
-- { 3134, 3136 },
-- { 3142, 3144 },
-- { 3146, 3149 },
-- { 3157, 3158 },
-- { 3170, 3171 },
-- { 3260, 3260 },
-- { 3263, 3263 },
-- { 3270, 3270 },
-- { 3276, 3277 },
-- { 3298, 3299 },
-- { 3393, 3396 },
-- { 3405, 3405 },
-- { 3426, 3427 },
-- { 3530, 3530 },
-- { 3538, 3540 },
-- { 3542, 3542 },
-- { 3633, 3633 },
-- { 3636, 3642 },
-- { 3655, 3662 },
-- { 3761, 3761 },
-- { 3764, 3769 },
-- { 3771, 3772 },
-- { 3784, 3789 },
-- { 3864, 3865 },
-- { 3893, 3893 },
-- { 3895, 3895 },
-- { 3897, 3897 },
-- { 3953, 3966 },
-- { 3968, 3972 },
-- { 3974, 3975 },
-- { 3981, 3991 },
-- { 3993, 4028 },
-- { 4038, 4038 },
-- { 4141, 4144 },
-- { 4146, 4151 },
-- { 4153, 4154 },
-- { 4157, 4158 },
-- { 4184, 4185 },
-- { 4190, 4192 },
-- { 4209, 4212 },
-- { 4226, 4226 },
-- { 4229, 4230 },
-- { 4237, 4237 },
-- { 4253, 4253 },
-- { 4957, 4959 },
-- { 5906, 5908 },
-- { 5938, 5940 },
-- { 5970, 5971 },
-- { 6002, 6003 },
-- { 6071, 6077 },
-- { 6086, 6086 },
-- { 6089, 6099 },
-- { 6109, 6109 },
-- { 6155, 6157 },
-- { 6313, 6313 },
-- { 6432, 6434 },
-- { 6439, 6440 },
-- { 6450, 6450 },
-- { 6457, 6459 },
-- { 6679, 6680 },
-- { 6742, 6742 },
-- { 6744, 6750 },
-- { 6752, 6752 },
-- { 6754, 6754 },
-- { 6757, 6764 },
-- { 6771, 6780 },
-- { 6783, 6783 },
-- { 6912, 6915 },
-- { 6964, 6964 },
-- { 6966, 6970 },
-- { 6972, 6972 },
-- { 6978, 6978 },
-- { 7019, 7027 },
-- { 7040, 7041 },
-- { 7074, 7077 },
-- { 7080, 7081 },
-- { 7142, 7142 },
-- { 7144, 7145 },
-- { 7149, 7149 },
-- { 7151, 7153 },
-- { 7212, 7219 },
-- { 7222, 7223 },
-- { 7376, 7378 },
-- { 7380, 7392 },
-- { 7394, 7400 },
-- { 7405, 7405 },
-- { 7616, 7654 },
-- { 7676, 7679 },
-- { 8400, 8412 },
-- { 8417, 8417 },
-- { 8421, 8432 },
-- { 11503, 11505 },
-- { 11647, 11647 },
-- { 11744, 11775 },
-- { 12330, 12335 },
-- { 12441, 12442 },
-- { 42607, 42607 },
-- { 42620, 42621 },
-- { 42736, 42737 },
-- { 43010, 43010 },
-- { 43014, 43014 },
-- { 43019, 43019 },
-- { 43045, 43046 },
-- { 43204, 43204 },
-- { 43232, 43249 },
-- { 43302, 43309 },
-- { 43335, 43345 },
-- { 43392, 43394 },
-- { 43443, 43443 },
-- { 43446, 43449 },
-- { 43452, 43452 },
-- { 43561, 43566 },
-- { 43569, 43570 },
-- { 43573, 43574 },
-- { 43587, 43587 },
-- { 43596, 43596 },
-- { 43696, 43696 },
-- { 43698, 43700 },
-- { 43703, 43704 },
-- { 43710, 43711 },
-- { 43713, 43713 },
-- { 44005, 44005 },
-- { 44008, 44008 },
-- { 44013, 44013 },
-- { 64286, 64286 },
-- { 65024, 65039 },
-- { 65056, 65062 },
--};
--static URange32 Mn_range32[] = {
-- { 66045, 66045 },
-- { 68097, 68099 },
-- { 68101, 68102 },
-- { 68108, 68111 },
-- { 68152, 68154 },
-- { 68159, 68159 },
-- { 69633, 69633 },
-- { 69688, 69702 },
-- { 69760, 69761 },
-- { 69811, 69814 },
-- { 69817, 69818 },
-- { 119143, 119145 },
-- { 119163, 119170 },
-- { 119173, 119179 },
-- { 119210, 119213 },
-- { 119362, 119364 },
-- { 917760, 917999 },
--};
--static URange16 M_range16[] = {
-- { 768, 879 },
-- { 1155, 1161 },
-- { 1425, 1469 },
-- { 1471, 1471 },
-- { 1473, 1474 },
-- { 1476, 1477 },
-- { 1479, 1479 },
-- { 1552, 1562 },
-- { 1611, 1631 },
-- { 1648, 1648 },
-- { 1750, 1756 },
-- { 1759, 1764 },
-- { 1767, 1768 },
-- { 1770, 1773 },
-- { 1809, 1809 },
-- { 1840, 1866 },
-- { 1958, 1968 },
-- { 2027, 2035 },
-- { 2070, 2073 },
-- { 2075, 2083 },
-- { 2085, 2087 },
-- { 2089, 2093 },
-- { 2137, 2139 },
-- { 2304, 2307 },
-- { 2362, 2364 },
-- { 2366, 2383 },
-- { 2385, 2391 },
-- { 2402, 2403 },
-- { 2433, 2435 },
-- { 2492, 2492 },
-- { 2494, 2500 },
-- { 2503, 2504 },
-- { 2507, 2509 },
-- { 2519, 2519 },
-- { 2530, 2531 },
-- { 2561, 2563 },
-- { 2620, 2620 },
-- { 2622, 2626 },
-- { 2631, 2632 },
-- { 2635, 2637 },
-- { 2641, 2641 },
-- { 2672, 2673 },
-- { 2677, 2677 },
-- { 2689, 2691 },
-- { 2748, 2748 },
-- { 2750, 2757 },
-- { 2759, 2761 },
-- { 2763, 2765 },
-- { 2786, 2787 },
-- { 2817, 2819 },
-- { 2876, 2876 },
-- { 2878, 2884 },
-- { 2887, 2888 },
-- { 2891, 2893 },
-- { 2902, 2903 },
-- { 2914, 2915 },
-- { 2946, 2946 },
-- { 3006, 3010 },
-- { 3014, 3016 },
-- { 3018, 3021 },
-- { 3031, 3031 },
-- { 3073, 3075 },
-- { 3134, 3140 },
-- { 3142, 3144 },
-- { 3146, 3149 },
-- { 3157, 3158 },
-- { 3170, 3171 },
-- { 3202, 3203 },
-- { 3260, 3260 },
-- { 3262, 3268 },
-- { 3270, 3272 },
-- { 3274, 3277 },
-- { 3285, 3286 },
-- { 3298, 3299 },
-- { 3330, 3331 },
-- { 3390, 3396 },
-- { 3398, 3400 },
-- { 3402, 3405 },
-- { 3415, 3415 },
-- { 3426, 3427 },
-- { 3458, 3459 },
-- { 3530, 3530 },
-- { 3535, 3540 },
-- { 3542, 3542 },
-- { 3544, 3551 },
-- { 3570, 3571 },
-- { 3633, 3633 },
-- { 3636, 3642 },
-- { 3655, 3662 },
-- { 3761, 3761 },
-- { 3764, 3769 },
-- { 3771, 3772 },
-- { 3784, 3789 },
-- { 3864, 3865 },
-- { 3893, 3893 },
-- { 3895, 3895 },
-- { 3897, 3897 },
-- { 3902, 3903 },
-- { 3953, 3972 },
-- { 3974, 3975 },
-- { 3981, 3991 },
-- { 3993, 4028 },
-- { 4038, 4038 },
-- { 4139, 4158 },
-- { 4182, 4185 },
-- { 4190, 4192 },
-- { 4194, 4196 },
-- { 4199, 4205 },
-- { 4209, 4212 },
-- { 4226, 4237 },
-- { 4239, 4239 },
-- { 4250, 4253 },
-- { 4957, 4959 },
-- { 5906, 5908 },
-- { 5938, 5940 },
-- { 5970, 5971 },
-- { 6002, 6003 },
-- { 6070, 6099 },
-- { 6109, 6109 },
-- { 6155, 6157 },
-- { 6313, 6313 },
-- { 6432, 6443 },
-- { 6448, 6459 },
-- { 6576, 6592 },
-- { 6600, 6601 },
-- { 6679, 6683 },
-- { 6741, 6750 },
-- { 6752, 6780 },
-- { 6783, 6783 },
-- { 6912, 6916 },
-- { 6964, 6980 },
-- { 7019, 7027 },
-- { 7040, 7042 },
-- { 7073, 7082 },
-- { 7142, 7155 },
-- { 7204, 7223 },
-- { 7376, 7378 },
-- { 7380, 7400 },
-- { 7405, 7405 },
-- { 7410, 7410 },
-- { 7616, 7654 },
-- { 7676, 7679 },
-- { 8400, 8432 },
-- { 11503, 11505 },
-- { 11647, 11647 },
-- { 11744, 11775 },
-- { 12330, 12335 },
-- { 12441, 12442 },
-- { 42607, 42610 },
-- { 42620, 42621 },
-- { 42736, 42737 },
-- { 43010, 43010 },
-- { 43014, 43014 },
-- { 43019, 43019 },
-- { 43043, 43047 },
-- { 43136, 43137 },
-- { 43188, 43204 },
-- { 43232, 43249 },
-- { 43302, 43309 },
-- { 43335, 43347 },
-- { 43392, 43395 },
-- { 43443, 43456 },
-- { 43561, 43574 },
-- { 43587, 43587 },
-- { 43596, 43597 },
-- { 43643, 43643 },
-- { 43696, 43696 },
-- { 43698, 43700 },
-- { 43703, 43704 },
-- { 43710, 43711 },
-- { 43713, 43713 },
-- { 44003, 44010 },
-- { 44012, 44013 },
-- { 64286, 64286 },
-- { 65024, 65039 },
-- { 65056, 65062 },
--};
--static URange32 M_range32[] = {
-- { 66045, 66045 },
-- { 68097, 68099 },
-- { 68101, 68102 },
-- { 68108, 68111 },
-- { 68152, 68154 },
-- { 68159, 68159 },
-- { 69632, 69634 },
-- { 69688, 69702 },
-- { 69760, 69762 },
-- { 69808, 69818 },
-- { 119141, 119145 },
-- { 119149, 119154 },
-- { 119163, 119170 },
-- { 119173, 119179 },
-- { 119210, 119213 },
-- { 119362, 119364 },
-- { 917760, 917999 },
--};
--static URange16 L_range16[] = {
-- { 65, 90 },
-- { 97, 122 },
-- { 170, 170 },
-- { 181, 181 },
-- { 186, 186 },
-- { 192, 214 },
-- { 216, 246 },
-- { 248, 705 },
-- { 710, 721 },
-- { 736, 740 },
-- { 748, 748 },
-- { 750, 750 },
-- { 880, 884 },
-- { 886, 887 },
-- { 890, 893 },
-- { 902, 902 },
-- { 904, 906 },
-- { 908, 908 },
-- { 910, 929 },
-- { 931, 1013 },
-- { 1015, 1153 },
-- { 1162, 1319 },
-- { 1329, 1366 },
-- { 1369, 1369 },
-- { 1377, 1415 },
-- { 1488, 1514 },
-- { 1520, 1522 },
-- { 1568, 1610 },
-- { 1646, 1647 },
-- { 1649, 1747 },
-- { 1749, 1749 },
-- { 1765, 1766 },
-- { 1774, 1775 },
-- { 1786, 1788 },
-- { 1791, 1791 },
-- { 1808, 1808 },
-- { 1810, 1839 },
-- { 1869, 1957 },
-- { 1969, 1969 },
-- { 1994, 2026 },
-- { 2036, 2037 },
-- { 2042, 2042 },
-- { 2048, 2069 },
-- { 2074, 2074 },
-- { 2084, 2084 },
-- { 2088, 2088 },
-- { 2112, 2136 },
-- { 2308, 2361 },
-- { 2365, 2365 },
-- { 2384, 2384 },
-- { 2392, 2401 },
-- { 2417, 2423 },
-- { 2425, 2431 },
-- { 2437, 2444 },
-- { 2447, 2448 },
-- { 2451, 2472 },
-- { 2474, 2480 },
-- { 2482, 2482 },
-- { 2486, 2489 },
-- { 2493, 2493 },
-- { 2510, 2510 },
-- { 2524, 2525 },
-- { 2527, 2529 },
-- { 2544, 2545 },
-- { 2565, 2570 },
-- { 2575, 2576 },
-- { 2579, 2600 },
-- { 2602, 2608 },
-- { 2610, 2611 },
-- { 2613, 2614 },
-- { 2616, 2617 },
-- { 2649, 2652 },
-- { 2654, 2654 },
-- { 2674, 2676 },
-- { 2693, 2701 },
-- { 2703, 2705 },
-- { 2707, 2728 },
-- { 2730, 2736 },
-- { 2738, 2739 },
-- { 2741, 2745 },
-- { 2749, 2749 },
-- { 2768, 2768 },
-- { 2784, 2785 },
-- { 2821, 2828 },
-- { 2831, 2832 },
-- { 2835, 2856 },
-- { 2858, 2864 },
-- { 2866, 2867 },
-- { 2869, 2873 },
-- { 2877, 2877 },
-- { 2908, 2909 },
-- { 2911, 2913 },
-- { 2929, 2929 },
-- { 2947, 2947 },
-- { 2949, 2954 },
-- { 2958, 2960 },
-- { 2962, 2965 },
-- { 2969, 2970 },
-- { 2972, 2972 },
-- { 2974, 2975 },
-- { 2979, 2980 },
-- { 2984, 2986 },
-- { 2990, 3001 },
-- { 3024, 3024 },
-- { 3077, 3084 },
-- { 3086, 3088 },
-- { 3090, 3112 },
-- { 3114, 3123 },
-- { 3125, 3129 },
-- { 3133, 3133 },
-- { 3160, 3161 },
-- { 3168, 3169 },
-- { 3205, 3212 },
-- { 3214, 3216 },
-- { 3218, 3240 },
-- { 3242, 3251 },
-- { 3253, 3257 },
-- { 3261, 3261 },
-- { 3294, 3294 },
-- { 3296, 3297 },
-- { 3313, 3314 },
-- { 3333, 3340 },
-- { 3342, 3344 },
-- { 3346, 3386 },
-- { 3389, 3389 },
-- { 3406, 3406 },
-- { 3424, 3425 },
-- { 3450, 3455 },
-- { 3461, 3478 },
-- { 3482, 3505 },
-- { 3507, 3515 },
-- { 3517, 3517 },
-- { 3520, 3526 },
-- { 3585, 3632 },
-- { 3634, 3635 },
-- { 3648, 3654 },
-- { 3713, 3714 },
-- { 3716, 3716 },
-- { 3719, 3720 },
-- { 3722, 3722 },
-- { 3725, 3725 },
-- { 3732, 3735 },
-- { 3737, 3743 },
-- { 3745, 3747 },
-- { 3749, 3749 },
-- { 3751, 3751 },
-- { 3754, 3755 },
-- { 3757, 3760 },
-- { 3762, 3763 },
-- { 3773, 3773 },
-- { 3776, 3780 },
-- { 3782, 3782 },
-- { 3804, 3805 },
-- { 3840, 3840 },
-- { 3904, 3911 },
-- { 3913, 3948 },
-- { 3976, 3980 },
-- { 4096, 4138 },
-- { 4159, 4159 },
-- { 4176, 4181 },
-- { 4186, 4189 },
-- { 4193, 4193 },
-- { 4197, 4198 },
-- { 4206, 4208 },
-- { 4213, 4225 },
-- { 4238, 4238 },
-- { 4256, 4293 },
-- { 4304, 4346 },
-- { 4348, 4348 },
-- { 4352, 4680 },
-- { 4682, 4685 },
-- { 4688, 4694 },
-- { 4696, 4696 },
-- { 4698, 4701 },
-- { 4704, 4744 },
-- { 4746, 4749 },
-- { 4752, 4784 },
-- { 4786, 4789 },
-- { 4792, 4798 },
-- { 4800, 4800 },
-- { 4802, 4805 },
-- { 4808, 4822 },
-- { 4824, 4880 },
-- { 4882, 4885 },
-- { 4888, 4954 },
-- { 4992, 5007 },
-- { 5024, 5108 },
-- { 5121, 5740 },
-- { 5743, 5759 },
-- { 5761, 5786 },
-- { 5792, 5866 },
-- { 5888, 5900 },
-- { 5902, 5905 },
-- { 5920, 5937 },
-- { 5952, 5969 },
-- { 5984, 5996 },
-- { 5998, 6000 },
-- { 6016, 6067 },
-- { 6103, 6103 },
-- { 6108, 6108 },
-- { 6176, 6263 },
-- { 6272, 6312 },
-- { 6314, 6314 },
-- { 6320, 6389 },
-- { 6400, 6428 },
-- { 6480, 6509 },
-- { 6512, 6516 },
-- { 6528, 6571 },
-- { 6593, 6599 },
-- { 6656, 6678 },
-- { 6688, 6740 },
-- { 6823, 6823 },
-- { 6917, 6963 },
-- { 6981, 6987 },
-- { 7043, 7072 },
-- { 7086, 7087 },
-- { 7104, 7141 },
-- { 7168, 7203 },
-- { 7245, 7247 },
-- { 7258, 7293 },
-- { 7401, 7404 },
-- { 7406, 7409 },
-- { 7424, 7615 },
-- { 7680, 7957 },
-- { 7960, 7965 },
-- { 7968, 8005 },
-- { 8008, 8013 },
-- { 8016, 8023 },
-- { 8025, 8025 },
-- { 8027, 8027 },
-- { 8029, 8029 },
-- { 8031, 8061 },
-- { 8064, 8116 },
-- { 8118, 8124 },
-- { 8126, 8126 },
-- { 8130, 8132 },
-- { 8134, 8140 },
-- { 8144, 8147 },
-- { 8150, 8155 },
-- { 8160, 8172 },
-- { 8178, 8180 },
-- { 8182, 8188 },
-- { 8305, 8305 },
-- { 8319, 8319 },
-- { 8336, 8348 },
-- { 8450, 8450 },
-- { 8455, 8455 },
-- { 8458, 8467 },
-- { 8469, 8469 },
-- { 8473, 8477 },
-- { 8484, 8484 },
-- { 8486, 8486 },
-- { 8488, 8488 },
-- { 8490, 8493 },
-- { 8495, 8505 },
-- { 8508, 8511 },
-- { 8517, 8521 },
-- { 8526, 8526 },
-- { 8579, 8580 },
-- { 11264, 11310 },
-- { 11312, 11358 },
-- { 11360, 11492 },
-- { 11499, 11502 },
-- { 11520, 11557 },
-- { 11568, 11621 },
-- { 11631, 11631 },
-- { 11648, 11670 },
-- { 11680, 11686 },
-- { 11688, 11694 },
-- { 11696, 11702 },
-- { 11704, 11710 },
-- { 11712, 11718 },
-- { 11720, 11726 },
-- { 11728, 11734 },
-- { 11736, 11742 },
-- { 11823, 11823 },
-- { 12293, 12294 },
-- { 12337, 12341 },
-- { 12347, 12348 },
-- { 12353, 12438 },
-- { 12445, 12447 },
-- { 12449, 12538 },
-- { 12540, 12543 },
-- { 12549, 12589 },
-- { 12593, 12686 },
-- { 12704, 12730 },
-- { 12784, 12799 },
-- { 13312, 19893 },
-- { 19968, 40907 },
-- { 40960, 42124 },
-- { 42192, 42237 },
-- { 42240, 42508 },
-- { 42512, 42527 },
-- { 42538, 42539 },
-- { 42560, 42606 },
-- { 42623, 42647 },
-- { 42656, 42725 },
-- { 42775, 42783 },
-- { 42786, 42888 },
-- { 42891, 42894 },
-- { 42896, 42897 },
-- { 42912, 42921 },
-- { 43002, 43009 },
-- { 43011, 43013 },
-- { 43015, 43018 },
-- { 43020, 43042 },
-- { 43072, 43123 },
-- { 43138, 43187 },
-- { 43250, 43255 },
-- { 43259, 43259 },
-- { 43274, 43301 },
-- { 43312, 43334 },
-- { 43360, 43388 },
-- { 43396, 43442 },
-- { 43471, 43471 },
-- { 43520, 43560 },
-- { 43584, 43586 },
-- { 43588, 43595 },
-- { 43616, 43638 },
-- { 43642, 43642 },
-- { 43648, 43695 },
-- { 43697, 43697 },
-- { 43701, 43702 },
-- { 43705, 43709 },
-- { 43712, 43712 },
-- { 43714, 43714 },
-- { 43739, 43741 },
-- { 43777, 43782 },
-- { 43785, 43790 },
-- { 43793, 43798 },
-- { 43808, 43814 },
-- { 43816, 43822 },
-- { 43968, 44002 },
-- { 44032, 55203 },
-- { 55216, 55238 },
-- { 55243, 55291 },
-- { 63744, 64045 },
-- { 64048, 64109 },
-- { 64112, 64217 },
-- { 64256, 64262 },
-- { 64275, 64279 },
-- { 64285, 64285 },
-- { 64287, 64296 },
-- { 64298, 64310 },
-- { 64312, 64316 },
-- { 64318, 64318 },
-- { 64320, 64321 },
-- { 64323, 64324 },
-- { 64326, 64433 },
-- { 64467, 64829 },
-- { 64848, 64911 },
-- { 64914, 64967 },
-- { 65008, 65019 },
-- { 65136, 65140 },
-- { 65142, 65276 },
-- { 65313, 65338 },
-- { 65345, 65370 },
-- { 65382, 65470 },
-- { 65474, 65479 },
-- { 65482, 65487 },
-- { 65490, 65495 },
-- { 65498, 65500 },
--};
--static URange32 L_range32[] = {
-- { 65536, 65547 },
-- { 65549, 65574 },
-- { 65576, 65594 },
-- { 65596, 65597 },
-- { 65599, 65613 },
-- { 65616, 65629 },
-- { 65664, 65786 },
-- { 66176, 66204 },
-- { 66208, 66256 },
-- { 66304, 66334 },
-- { 66352, 66368 },
-- { 66370, 66377 },
-- { 66432, 66461 },
-- { 66464, 66499 },
-- { 66504, 66511 },
-- { 66560, 66717 },
-- { 67584, 67589 },
-- { 67592, 67592 },
-- { 67594, 67637 },
-- { 67639, 67640 },
-- { 67644, 67644 },
-- { 67647, 67669 },
-- { 67840, 67861 },
-- { 67872, 67897 },
-- { 68096, 68096 },
-- { 68112, 68115 },
-- { 68117, 68119 },
-- { 68121, 68147 },
-- { 68192, 68220 },
-- { 68352, 68405 },
-- { 68416, 68437 },
-- { 68448, 68466 },
-- { 68608, 68680 },
-- { 69635, 69687 },
-- { 69763, 69807 },
-- { 73728, 74606 },
-- { 77824, 78894 },
-- { 92160, 92728 },
-- { 110592, 110593 },
-- { 119808, 119892 },
-- { 119894, 119964 },
-- { 119966, 119967 },
-- { 119970, 119970 },
-- { 119973, 119974 },
-- { 119977, 119980 },
-- { 119982, 119993 },
-- { 119995, 119995 },
-- { 119997, 120003 },
-- { 120005, 120069 },
-- { 120071, 120074 },
-- { 120077, 120084 },
-- { 120086, 120092 },
-- { 120094, 120121 },
-- { 120123, 120126 },
-- { 120128, 120132 },
-- { 120134, 120134 },
-- { 120138, 120144 },
-- { 120146, 120485 },
-- { 120488, 120512 },
-- { 120514, 120538 },
-- { 120540, 120570 },
-- { 120572, 120596 },
-- { 120598, 120628 },
-- { 120630, 120654 },
-- { 120656, 120686 },
-- { 120688, 120712 },
-- { 120714, 120744 },
-- { 120746, 120770 },
-- { 120772, 120779 },
-- { 131072, 173782 },
-- { 173824, 177972 },
-- { 177984, 178205 },
-- { 194560, 195101 },
--};
--static URange16 N_range16[] = {
-- { 48, 57 },
-- { 178, 179 },
-- { 185, 185 },
-- { 188, 190 },
-- { 1632, 1641 },
-- { 1776, 1785 },
-- { 1984, 1993 },
-- { 2406, 2415 },
-- { 2534, 2543 },
-- { 2548, 2553 },
-- { 2662, 2671 },
-- { 2790, 2799 },
-- { 2918, 2927 },
-- { 2930, 2935 },
-- { 3046, 3058 },
-- { 3174, 3183 },
-- { 3192, 3198 },
-- { 3302, 3311 },
-- { 3430, 3445 },
-- { 3664, 3673 },
-- { 3792, 3801 },
-- { 3872, 3891 },
-- { 4160, 4169 },
-- { 4240, 4249 },
-- { 4969, 4988 },
-- { 5870, 5872 },
-- { 6112, 6121 },
-- { 6128, 6137 },
-- { 6160, 6169 },
-- { 6470, 6479 },
-- { 6608, 6618 },
-- { 6784, 6793 },
-- { 6800, 6809 },
-- { 6992, 7001 },
-- { 7088, 7097 },
-- { 7232, 7241 },
-- { 7248, 7257 },
-- { 8304, 8304 },
-- { 8308, 8313 },
-- { 8320, 8329 },
-- { 8528, 8578 },
-- { 8581, 8585 },
-- { 9312, 9371 },
-- { 9450, 9471 },
-- { 10102, 10131 },
-- { 11517, 11517 },
-- { 12295, 12295 },
-- { 12321, 12329 },
-- { 12344, 12346 },
-- { 12690, 12693 },
-- { 12832, 12841 },
-- { 12881, 12895 },
-- { 12928, 12937 },
-- { 12977, 12991 },
-- { 42528, 42537 },
-- { 42726, 42735 },
-- { 43056, 43061 },
-- { 43216, 43225 },
-- { 43264, 43273 },
-- { 43472, 43481 },
-- { 43600, 43609 },
-- { 44016, 44025 },
-- { 65296, 65305 },
--};
--static URange32 N_range32[] = {
-- { 65799, 65843 },
-- { 65856, 65912 },
-- { 65930, 65930 },
-- { 66336, 66339 },
-- { 66369, 66369 },
-- { 66378, 66378 },
-- { 66513, 66517 },
-- { 66720, 66729 },
-- { 67672, 67679 },
-- { 67862, 67867 },
-- { 68160, 68167 },
-- { 68221, 68222 },
-- { 68440, 68447 },
-- { 68472, 68479 },
-- { 69216, 69246 },
-- { 69714, 69743 },
-- { 74752, 74850 },
-- { 119648, 119665 },
-- { 120782, 120831 },
-- { 127232, 127242 },
--};
--static URange16 Sk_range16[] = {
-- { 94, 94 },
-- { 96, 96 },
-- { 168, 168 },
-- { 175, 175 },
-- { 180, 180 },
-- { 184, 184 },
-- { 706, 709 },
-- { 722, 735 },
-- { 741, 747 },
-- { 749, 749 },
-- { 751, 767 },
-- { 885, 885 },
-- { 900, 901 },
-- { 8125, 8125 },
-- { 8127, 8129 },
-- { 8141, 8143 },
-- { 8157, 8159 },
-- { 8173, 8175 },
-- { 8189, 8190 },
-- { 12443, 12444 },
-- { 42752, 42774 },
-- { 42784, 42785 },
-- { 42889, 42890 },
-- { 64434, 64449 },
-- { 65342, 65342 },
-- { 65344, 65344 },
-- { 65507, 65507 },
--};
--static URange16 P_range16[] = {
-- { 33, 35 },
-- { 37, 42 },
-- { 44, 47 },
-- { 58, 59 },
-- { 63, 64 },
-- { 91, 93 },
-- { 95, 95 },
-- { 123, 123 },
-- { 125, 125 },
-- { 161, 161 },
-- { 171, 171 },
-- { 183, 183 },
-- { 187, 187 },
-- { 191, 191 },
-- { 894, 894 },
-- { 903, 903 },
-- { 1370, 1375 },
-- { 1417, 1418 },
-- { 1470, 1470 },
-- { 1472, 1472 },
-- { 1475, 1475 },
-- { 1478, 1478 },
-- { 1523, 1524 },
-- { 1545, 1546 },
-- { 1548, 1549 },
-- { 1563, 1563 },
-- { 1566, 1567 },
-- { 1642, 1645 },
-- { 1748, 1748 },
-- { 1792, 1805 },
-- { 2039, 2041 },
-- { 2096, 2110 },
-- { 2142, 2142 },
-- { 2404, 2405 },
-- { 2416, 2416 },
-- { 3572, 3572 },
-- { 3663, 3663 },
-- { 3674, 3675 },
-- { 3844, 3858 },
-- { 3898, 3901 },
-- { 3973, 3973 },
-- { 4048, 4052 },
-- { 4057, 4058 },
-- { 4170, 4175 },
-- { 4347, 4347 },
-- { 4961, 4968 },
-- { 5120, 5120 },
-- { 5741, 5742 },
-- { 5787, 5788 },
-- { 5867, 5869 },
-- { 5941, 5942 },
-- { 6100, 6102 },
-- { 6104, 6106 },
-- { 6144, 6154 },
-- { 6468, 6469 },
-- { 6686, 6687 },
-- { 6816, 6822 },
-- { 6824, 6829 },
-- { 7002, 7008 },
-- { 7164, 7167 },
-- { 7227, 7231 },
-- { 7294, 7295 },
-- { 7379, 7379 },
-- { 8208, 8231 },
-- { 8240, 8259 },
-- { 8261, 8273 },
-- { 8275, 8286 },
-- { 8317, 8318 },
-- { 8333, 8334 },
-- { 9001, 9002 },
-- { 10088, 10101 },
-- { 10181, 10182 },
-- { 10214, 10223 },
-- { 10627, 10648 },
-- { 10712, 10715 },
-- { 10748, 10749 },
-- { 11513, 11516 },
-- { 11518, 11519 },
-- { 11632, 11632 },
-- { 11776, 11822 },
-- { 11824, 11825 },
-- { 12289, 12291 },
-- { 12296, 12305 },
-- { 12308, 12319 },
-- { 12336, 12336 },
-- { 12349, 12349 },
-- { 12448, 12448 },
-- { 12539, 12539 },
-- { 42238, 42239 },
-- { 42509, 42511 },
-- { 42611, 42611 },
-- { 42622, 42622 },
-- { 42738, 42743 },
-- { 43124, 43127 },
-- { 43214, 43215 },
-- { 43256, 43258 },
-- { 43310, 43311 },
-- { 43359, 43359 },
-- { 43457, 43469 },
-- { 43486, 43487 },
-- { 43612, 43615 },
-- { 43742, 43743 },
-- { 44011, 44011 },
-- { 64830, 64831 },
-- { 65040, 65049 },
-- { 65072, 65106 },
-- { 65108, 65121 },
-- { 65123, 65123 },
-- { 65128, 65128 },
-- { 65130, 65131 },
-- { 65281, 65283 },
-- { 65285, 65290 },
-- { 65292, 65295 },
-- { 65306, 65307 },
-- { 65311, 65312 },
-- { 65339, 65341 },
-- { 65343, 65343 },
-- { 65371, 65371 },
-- { 65373, 65373 },
-- { 65375, 65381 },
--};
--static URange32 P_range32[] = {
-- { 65792, 65793 },
-- { 66463, 66463 },
-- { 66512, 66512 },
-- { 67671, 67671 },
-- { 67871, 67871 },
-- { 67903, 67903 },
-- { 68176, 68184 },
-- { 68223, 68223 },
-- { 68409, 68415 },
-- { 69703, 69709 },
-- { 69819, 69820 },
-- { 69822, 69825 },
-- { 74864, 74867 },
--};
--static URange16 S_range16[] = {
-- { 36, 36 },
-- { 43, 43 },
-- { 60, 62 },
-- { 94, 94 },
-- { 96, 96 },
-- { 124, 124 },
-- { 126, 126 },
-- { 162, 169 },
-- { 172, 172 },
-- { 174, 177 },
-- { 180, 180 },
-- { 182, 182 },
-- { 184, 184 },
-- { 215, 215 },
-- { 247, 247 },
-- { 706, 709 },
-- { 722, 735 },
-- { 741, 747 },
-- { 749, 749 },
-- { 751, 767 },
-- { 885, 885 },
-- { 900, 901 },
-- { 1014, 1014 },
-- { 1154, 1154 },
-- { 1542, 1544 },
-- { 1547, 1547 },
-- { 1550, 1551 },
-- { 1758, 1758 },
-- { 1769, 1769 },
-- { 1789, 1790 },
-- { 2038, 2038 },
-- { 2546, 2547 },
-- { 2554, 2555 },
-- { 2801, 2801 },
-- { 2928, 2928 },
-- { 3059, 3066 },
-- { 3199, 3199 },
-- { 3449, 3449 },
-- { 3647, 3647 },
-- { 3841, 3843 },
-- { 3859, 3863 },
-- { 3866, 3871 },
-- { 3892, 3892 },
-- { 3894, 3894 },
-- { 3896, 3896 },
-- { 4030, 4037 },
-- { 4039, 4044 },
-- { 4046, 4047 },
-- { 4053, 4056 },
-- { 4254, 4255 },
-- { 4960, 4960 },
-- { 5008, 5017 },
-- { 6107, 6107 },
-- { 6464, 6464 },
-- { 6622, 6655 },
-- { 7009, 7018 },
-- { 7028, 7036 },
-- { 8125, 8125 },
-- { 8127, 8129 },
-- { 8141, 8143 },
-- { 8157, 8159 },
-- { 8173, 8175 },
-- { 8189, 8190 },
-- { 8260, 8260 },
-- { 8274, 8274 },
-- { 8314, 8316 },
-- { 8330, 8332 },
-- { 8352, 8377 },
-- { 8448, 8449 },
-- { 8451, 8454 },
-- { 8456, 8457 },
-- { 8468, 8468 },
-- { 8470, 8472 },
-- { 8478, 8483 },
-- { 8485, 8485 },
-- { 8487, 8487 },
-- { 8489, 8489 },
-- { 8494, 8494 },
-- { 8506, 8507 },
-- { 8512, 8516 },
-- { 8522, 8525 },
-- { 8527, 8527 },
-- { 8592, 9000 },
-- { 9003, 9203 },
-- { 9216, 9254 },
-- { 9280, 9290 },
-- { 9372, 9449 },
-- { 9472, 9983 },
-- { 9985, 10087 },
-- { 10132, 10180 },
-- { 10183, 10186 },
-- { 10188, 10188 },
-- { 10190, 10213 },
-- { 10224, 10626 },
-- { 10649, 10711 },
-- { 10716, 10747 },
-- { 10750, 11084 },
-- { 11088, 11097 },
-- { 11493, 11498 },
-- { 11904, 11929 },
-- { 11931, 12019 },
-- { 12032, 12245 },
-- { 12272, 12283 },
-- { 12292, 12292 },
-- { 12306, 12307 },
-- { 12320, 12320 },
-- { 12342, 12343 },
-- { 12350, 12351 },
-- { 12443, 12444 },
-- { 12688, 12689 },
-- { 12694, 12703 },
-- { 12736, 12771 },
-- { 12800, 12830 },
-- { 12842, 12880 },
-- { 12896, 12927 },
-- { 12938, 12976 },
-- { 12992, 13054 },
-- { 13056, 13311 },
-- { 19904, 19967 },
-- { 42128, 42182 },
-- { 42752, 42774 },
-- { 42784, 42785 },
-- { 42889, 42890 },
-- { 43048, 43051 },
-- { 43062, 43065 },
-- { 43639, 43641 },
-- { 64297, 64297 },
-- { 64434, 64449 },
-- { 65020, 65021 },
-- { 65122, 65122 },
-- { 65124, 65126 },
-- { 65129, 65129 },
-- { 65284, 65284 },
-- { 65291, 65291 },
-- { 65308, 65310 },
-- { 65342, 65342 },
-- { 65344, 65344 },
-- { 65372, 65372 },
-- { 65374, 65374 },
-- { 65504, 65510 },
-- { 65512, 65518 },
-- { 65532, 65533 },
--};
--static URange32 S_range32[] = {
-- { 65794, 65794 },
-- { 65847, 65855 },
-- { 65913, 65929 },
-- { 65936, 65947 },
-- { 66000, 66044 },
-- { 118784, 119029 },
-- { 119040, 119078 },
-- { 119081, 119140 },
-- { 119146, 119148 },
-- { 119171, 119172 },
-- { 119180, 119209 },
-- { 119214, 119261 },
-- { 119296, 119361 },
-- { 119365, 119365 },
-- { 119552, 119638 },
-- { 120513, 120513 },
-- { 120539, 120539 },
-- { 120571, 120571 },
-- { 120597, 120597 },
-- { 120629, 120629 },
-- { 120655, 120655 },
-- { 120687, 120687 },
-- { 120713, 120713 },
-- { 120745, 120745 },
-- { 120771, 120771 },
-- { 126976, 127019 },
-- { 127024, 127123 },
-- { 127136, 127150 },
-- { 127153, 127166 },
-- { 127169, 127183 },
-- { 127185, 127199 },
-- { 127248, 127278 },
-- { 127280, 127337 },
-- { 127344, 127386 },
-- { 127462, 127490 },
-- { 127504, 127546 },
-- { 127552, 127560 },
-- { 127568, 127569 },
-- { 127744, 127776 },
-- { 127792, 127797 },
-- { 127799, 127868 },
-- { 127872, 127891 },
-- { 127904, 127940 },
-- { 127942, 127946 },
-- { 127968, 127984 },
-- { 128000, 128062 },
-- { 128064, 128064 },
-- { 128066, 128247 },
-- { 128249, 128252 },
-- { 128256, 128317 },
-- { 128336, 128359 },
-- { 128507, 128511 },
-- { 128513, 128528 },
-- { 128530, 128532 },
-- { 128534, 128534 },
-- { 128536, 128536 },
-- { 128538, 128538 },
-- { 128540, 128542 },
-- { 128544, 128549 },
-- { 128552, 128555 },
-- { 128557, 128557 },
-- { 128560, 128563 },
-- { 128565, 128576 },
-- { 128581, 128591 },
-- { 128640, 128709 },
-- { 128768, 128883 },
--};
--static URange16 So_range16[] = {
-- { 166, 167 },
-- { 169, 169 },
-- { 174, 174 },
-- { 176, 176 },
-- { 182, 182 },
-- { 1154, 1154 },
-- { 1550, 1551 },
-- { 1758, 1758 },
-- { 1769, 1769 },
-- { 1789, 1790 },
-- { 2038, 2038 },
-- { 2554, 2554 },
-- { 2928, 2928 },
-- { 3059, 3064 },
-- { 3066, 3066 },
-- { 3199, 3199 },
-- { 3449, 3449 },
-- { 3841, 3843 },
-- { 3859, 3863 },
-- { 3866, 3871 },
-- { 3892, 3892 },
-- { 3894, 3894 },
-- { 3896, 3896 },
-- { 4030, 4037 },
-- { 4039, 4044 },
-- { 4046, 4047 },
-- { 4053, 4056 },
-- { 4254, 4255 },
-- { 4960, 4960 },
-- { 5008, 5017 },
-- { 6464, 6464 },
-- { 6622, 6655 },
-- { 7009, 7018 },
-- { 7028, 7036 },
-- { 8448, 8449 },
-- { 8451, 8454 },
-- { 8456, 8457 },
-- { 8468, 8468 },
-- { 8470, 8471 },
-- { 8478, 8483 },
-- { 8485, 8485 },
-- { 8487, 8487 },
-- { 8489, 8489 },
-- { 8494, 8494 },
-- { 8506, 8507 },
-- { 8522, 8522 },
-- { 8524, 8525 },
-- { 8527, 8527 },
-- { 8597, 8601 },
-- { 8604, 8607 },
-- { 8609, 8610 },
-- { 8612, 8613 },
-- { 8615, 8621 },
-- { 8623, 8653 },
-- { 8656, 8657 },
-- { 8659, 8659 },
-- { 8661, 8691 },
-- { 8960, 8967 },
-- { 8972, 8991 },
-- { 8994, 9000 },
-- { 9003, 9083 },
-- { 9085, 9114 },
-- { 9140, 9179 },
-- { 9186, 9203 },
-- { 9216, 9254 },
-- { 9280, 9290 },
-- { 9372, 9449 },
-- { 9472, 9654 },
-- { 9656, 9664 },
-- { 9666, 9719 },
-- { 9728, 9838 },
-- { 9840, 9983 },
-- { 9985, 10087 },
-- { 10132, 10175 },
-- { 10240, 10495 },
-- { 11008, 11055 },
-- { 11077, 11078 },
-- { 11088, 11097 },
-- { 11493, 11498 },
-- { 11904, 11929 },
-- { 11931, 12019 },
-- { 12032, 12245 },
-- { 12272, 12283 },
-- { 12292, 12292 },
-- { 12306, 12307 },
-- { 12320, 12320 },
-- { 12342, 12343 },
-- { 12350, 12351 },
-- { 12688, 12689 },
-- { 12694, 12703 },
-- { 12736, 12771 },
-- { 12800, 12830 },
-- { 12842, 12880 },
-- { 12896, 12927 },
-- { 12938, 12976 },
-- { 12992, 13054 },
-- { 13056, 13311 },
-- { 19904, 19967 },
-- { 42128, 42182 },
-- { 43048, 43051 },
-- { 43062, 43063 },
-- { 43065, 43065 },
-- { 43639, 43641 },
-- { 65021, 65021 },
-- { 65508, 65508 },
-- { 65512, 65512 },
-- { 65517, 65518 },
-- { 65532, 65533 },
--};
--static URange32 So_range32[] = {
-- { 65794, 65794 },
-- { 65847, 65855 },
-- { 65913, 65929 },
-- { 65936, 65947 },
-- { 66000, 66044 },
-- { 118784, 119029 },
-- { 119040, 119078 },
-- { 119081, 119140 },
-- { 119146, 119148 },
-- { 119171, 119172 },
-- { 119180, 119209 },
-- { 119214, 119261 },
-- { 119296, 119361 },
-- { 119365, 119365 },
-- { 119552, 119638 },
-- { 126976, 127019 },
-- { 127024, 127123 },
-- { 127136, 127150 },
-- { 127153, 127166 },
-- { 127169, 127183 },
-- { 127185, 127199 },
-- { 127248, 127278 },
-- { 127280, 127337 },
-- { 127344, 127386 },
-- { 127462, 127490 },
-- { 127504, 127546 },
-- { 127552, 127560 },
-- { 127568, 127569 },
-- { 127744, 127776 },
-- { 127792, 127797 },
-- { 127799, 127868 },
-- { 127872, 127891 },
-- { 127904, 127940 },
-- { 127942, 127946 },
-- { 127968, 127984 },
-- { 128000, 128062 },
-- { 128064, 128064 },
-- { 128066, 128247 },
-- { 128249, 128252 },
-- { 128256, 128317 },
-- { 128336, 128359 },
-- { 128507, 128511 },
-- { 128513, 128528 },
-- { 128530, 128532 },
-- { 128534, 128534 },
-- { 128536, 128536 },
-- { 128538, 128538 },
-- { 128540, 128542 },
-- { 128544, 128549 },
-- { 128552, 128555 },
-- { 128557, 128557 },
-- { 128560, 128563 },
-- { 128565, 128576 },
-- { 128581, 128591 },
-- { 128640, 128709 },
-- { 128768, 128883 },
--};
--static URange16 Sm_range16[] = {
-- { 43, 43 },
-- { 60, 62 },
-- { 124, 124 },
-- { 126, 126 },
-- { 172, 172 },
-- { 177, 177 },
-- { 215, 215 },
-- { 247, 247 },
-- { 1014, 1014 },
-- { 1542, 1544 },
-- { 8260, 8260 },
-- { 8274, 8274 },
-- { 8314, 8316 },
-- { 8330, 8332 },
-- { 8472, 8472 },
-- { 8512, 8516 },
-- { 8523, 8523 },
-- { 8592, 8596 },
-- { 8602, 8603 },
-- { 8608, 8608 },
-- { 8611, 8611 },
-- { 8614, 8614 },
-- { 8622, 8622 },
-- { 8654, 8655 },
-- { 8658, 8658 },
-- { 8660, 8660 },
-- { 8692, 8959 },
-- { 8968, 8971 },
-- { 8992, 8993 },
-- { 9084, 9084 },
-- { 9115, 9139 },
-- { 9180, 9185 },
-- { 9655, 9655 },
-- { 9665, 9665 },
-- { 9720, 9727 },
-- { 9839, 9839 },
-- { 10176, 10180 },
-- { 10183, 10186 },
-- { 10188, 10188 },
-- { 10190, 10213 },
-- { 10224, 10239 },
-- { 10496, 10626 },
-- { 10649, 10711 },
-- { 10716, 10747 },
-- { 10750, 11007 },
-- { 11056, 11076 },
-- { 11079, 11084 },
-- { 64297, 64297 },
-- { 65122, 65122 },
-- { 65124, 65126 },
-- { 65291, 65291 },
-- { 65308, 65310 },
-- { 65372, 65372 },
-- { 65374, 65374 },
-- { 65506, 65506 },
-- { 65513, 65516 },
--};
--static URange32 Sm_range32[] = {
-- { 120513, 120513 },
-- { 120539, 120539 },
-- { 120571, 120571 },
-- { 120597, 120597 },
-- { 120629, 120629 },
-- { 120655, 120655 },
-- { 120687, 120687 },
-- { 120713, 120713 },
-- { 120745, 120745 },
-- { 120771, 120771 },
--};
--static URange16 Sc_range16[] = {
-- { 36, 36 },
-- { 162, 165 },
-- { 1547, 1547 },
-- { 2546, 2547 },
-- { 2555, 2555 },
-- { 2801, 2801 },
-- { 3065, 3065 },
-- { 3647, 3647 },
-- { 6107, 6107 },
-- { 8352, 8377 },
-- { 43064, 43064 },
-- { 65020, 65020 },
-- { 65129, 65129 },
-- { 65284, 65284 },
-- { 65504, 65505 },
-- { 65509, 65510 },
--};
--static URange16 Z_range16[] = {
-- { 32, 32 },
-- { 160, 160 },
-- { 5760, 5760 },
-- { 6158, 6158 },
-- { 8192, 8202 },
-- { 8232, 8233 },
-- { 8239, 8239 },
-- { 8287, 8287 },
-- { 12288, 12288 },
--};
--static URange16 Zl_range16[] = {
-- { 8232, 8232 },
--};
--static URange16 Co_range16[] = {
-- { 57344, 63743 },
--};
--static URange32 Co_range32[] = {
-- { 983040, 1048573 },
-- { 1048576, 1114109 },
--};
--static URange16 Cc_range16[] = {
-- { 0, 31 },
-- { 127, 159 },
--};
--static URange16 Cf_range16[] = {
-- { 173, 173 },
-- { 1536, 1539 },
-- { 1757, 1757 },
-- { 1807, 1807 },
-- { 6068, 6069 },
-- { 8203, 8207 },
-- { 8234, 8238 },
-- { 8288, 8292 },
-- { 8298, 8303 },
-- { 65279, 65279 },
-- { 65529, 65531 },
--};
--static URange32 Cf_range32[] = {
-- { 69821, 69821 },
-- { 119155, 119162 },
-- { 917505, 917505 },
-- { 917536, 917631 },
--};
--static URange16 Cs_range16[] = {
-- { 55296, 57343 },
--};
--static URange16 Zp_range16[] = {
-- { 8233, 8233 },
--};
--static URange16 Zs_range16[] = {
-- { 32, 32 },
-- { 160, 160 },
-- { 5760, 5760 },
-- { 6158, 6158 },
-- { 8192, 8202 },
-- { 8239, 8239 },
-- { 8287, 8287 },
-- { 12288, 12288 },
--};
--static URange16 Thaana_range16[] = {
-- { 1920, 1969 },
--};
--static URange16 Telugu_range16[] = {
-- { 3073, 3075 },
-- { 3077, 3084 },
-- { 3086, 3088 },
-- { 3090, 3112 },
-- { 3114, 3123 },
-- { 3125, 3129 },
-- { 3133, 3140 },
-- { 3142, 3144 },
-- { 3146, 3149 },
-- { 3157, 3158 },
-- { 3160, 3161 },
-- { 3168, 3171 },
-- { 3174, 3183 },
-- { 3192, 3199 },
--};
--static URange16 Cyrillic_range16[] = {
-- { 1024, 1156 },
-- { 1159, 1319 },
-- { 7467, 7467 },
-- { 7544, 7544 },
-- { 11744, 11775 },
-- { 42560, 42611 },
-- { 42620, 42647 },
--};
--static URange16 Hangul_range16[] = {
-- { 4352, 4607 },
-- { 12334, 12335 },
-- { 12593, 12686 },
-- { 12800, 12830 },
-- { 12896, 12926 },
-- { 43360, 43388 },
-- { 44032, 55203 },
-- { 55216, 55238 },
-- { 55243, 55291 },
-- { 65440, 65470 },
-- { 65474, 65479 },
-- { 65482, 65487 },
-- { 65490, 65495 },
-- { 65498, 65500 },
--};
--static URange32 Old_South_Arabian_range32[] = {
-- { 68192, 68223 },
--};
--static URange16 Ethiopic_range16[] = {
-- { 4608, 4680 },
-- { 4682, 4685 },
-- { 4688, 4694 },
-- { 4696, 4696 },
-- { 4698, 4701 },
-- { 4704, 4744 },
-- { 4746, 4749 },
-- { 4752, 4784 },
-- { 4786, 4789 },
-- { 4792, 4798 },
-- { 4800, 4800 },
-- { 4802, 4805 },
-- { 4808, 4822 },
-- { 4824, 4880 },
-- { 4882, 4885 },
-- { 4888, 4954 },
-- { 4957, 4988 },
-- { 4992, 5017 },
-- { 11648, 11670 },
-- { 11680, 11686 },
-- { 11688, 11694 },
-- { 11696, 11702 },
-- { 11704, 11710 },
-- { 11712, 11718 },
-- { 11720, 11726 },
-- { 11728, 11734 },
-- { 11736, 11742 },
-- { 43777, 43782 },
-- { 43785, 43790 },
-- { 43793, 43798 },
-- { 43808, 43814 },
-- { 43816, 43822 },
--};
--static URange16 Inherited_range16[] = {
-- { 768, 879 },
-- { 1157, 1158 },
-- { 1611, 1621 },
-- { 1631, 1631 },
-- { 1648, 1648 },
-- { 2385, 2386 },
-- { 7376, 7378 },
-- { 7380, 7392 },
-- { 7394, 7400 },
-- { 7405, 7405 },
-- { 7616, 7654 },
-- { 7676, 7679 },
-- { 8204, 8205 },
-- { 8400, 8432 },
-- { 12330, 12333 },
-- { 12441, 12442 },
-- { 65024, 65039 },
-- { 65056, 65062 },
--};
--static URange32 Inherited_range32[] = {
-- { 66045, 66045 },
-- { 119143, 119145 },
-- { 119163, 119170 },
-- { 119173, 119179 },
-- { 119210, 119213 },
-- { 917760, 917999 },
--};
--static URange16 Han_range16[] = {
-- { 11904, 11929 },
-- { 11931, 12019 },
-- { 12032, 12245 },
-- { 12293, 12293 },
-- { 12295, 12295 },
-- { 12321, 12329 },
-- { 12344, 12347 },
-- { 13312, 19893 },
-- { 19968, 40907 },
-- { 63744, 64045 },
-- { 64048, 64109 },
-- { 64112, 64217 },
--};
--static URange32 Han_range32[] = {
-- { 131072, 173782 },
-- { 173824, 177972 },
-- { 177984, 178205 },
-- { 194560, 195101 },
--};
--static URange16 Armenian_range16[] = {
-- { 1329, 1366 },
-- { 1369, 1375 },
-- { 1377, 1415 },
-- { 1418, 1418 },
-- { 64275, 64279 },
--};
--static URange16 Tamil_range16[] = {
-- { 2946, 2947 },
-- { 2949, 2954 },
-- { 2958, 2960 },
-- { 2962, 2965 },
-- { 2969, 2970 },
-- { 2972, 2972 },
-- { 2974, 2975 },
-- { 2979, 2980 },
-- { 2984, 2986 },
-- { 2990, 3001 },
-- { 3006, 3010 },
-- { 3014, 3016 },
-- { 3018, 3021 },
-- { 3024, 3024 },
-- { 3031, 3031 },
-- { 3046, 3066 },
--};
--static URange16 Bopomofo_range16[] = {
-- { 746, 747 },
-- { 12549, 12589 },
-- { 12704, 12730 },
--};
--static URange16 Sundanese_range16[] = {
-- { 7040, 7082 },
-- { 7086, 7097 },
--};
--static URange16 Tagalog_range16[] = {
-- { 5888, 5900 },
-- { 5902, 5908 },
--};
--static URange16 Malayalam_range16[] = {
-- { 3330, 3331 },
-- { 3333, 3340 },
-- { 3342, 3344 },
-- { 3346, 3386 },
-- { 3389, 3396 },
-- { 3398, 3400 },
-- { 3402, 3406 },
-- { 3415, 3415 },
-- { 3424, 3427 },
-- { 3430, 3445 },
-- { 3449, 3455 },
--};
--static URange32 Carian_range32[] = {
-- { 66208, 66256 },
--};
--static URange16 Hiragana_range16[] = {
-- { 12353, 12438 },
-- { 12445, 12447 },
--};
--static URange32 Hiragana_range32[] = {
-- { 110593, 110593 },
-- { 127488, 127488 },
--};
--static URange16 Tagbanwa_range16[] = {
-- { 5984, 5996 },
-- { 5998, 6000 },
-- { 6002, 6003 },
--};
--static URange16 Meetei_Mayek_range16[] = {
-- { 43968, 44013 },
-- { 44016, 44025 },
--};
--static URange16 Tai_Le_range16[] = {
-- { 6480, 6509 },
-- { 6512, 6516 },
--};
--static URange16 Kayah_Li_range16[] = {
-- { 43264, 43311 },
--};
--static URange16 Buginese_range16[] = {
-- { 6656, 6683 },
-- { 6686, 6687 },
--};
--static URange32 Kharoshthi_range32[] = {
-- { 68096, 68099 },
-- { 68101, 68102 },
-- { 68108, 68115 },
-- { 68117, 68119 },
-- { 68121, 68147 },
-- { 68152, 68154 },
-- { 68159, 68167 },
-- { 68176, 68184 },
--};
--static URange16 Tai_Tham_range16[] = {
-- { 6688, 6750 },
-- { 6752, 6780 },
-- { 6783, 6793 },
-- { 6800, 6809 },
-- { 6816, 6829 },
--};
--static URange32 Old_Italic_range32[] = {
-- { 66304, 66334 },
-- { 66336, 66339 },
--};
--static URange32 Old_Persian_range32[] = {
-- { 66464, 66499 },
-- { 66504, 66517 },
--};
--static URange16 Latin_range16[] = {
-- { 65, 90 },
-- { 97, 122 },
-- { 170, 170 },
-- { 186, 186 },
-- { 192, 214 },
-- { 216, 246 },
-- { 248, 696 },
-- { 736, 740 },
-- { 7424, 7461 },
-- { 7468, 7516 },
-- { 7522, 7525 },
-- { 7531, 7543 },
-- { 7545, 7614 },
-- { 7680, 7935 },
-- { 8305, 8305 },
-- { 8319, 8319 },
-- { 8336, 8348 },
-- { 8490, 8491 },
-- { 8498, 8498 },
-- { 8526, 8526 },
-- { 8544, 8584 },
-- { 11360, 11391 },
-- { 42786, 42887 },
-- { 42891, 42894 },
-- { 42896, 42897 },
-- { 42912, 42921 },
-- { 43002, 43007 },
-- { 64256, 64262 },
-- { 65313, 65338 },
-- { 65345, 65370 },
--};
--static URange16 Saurashtra_range16[] = {
-- { 43136, 43204 },
-- { 43214, 43225 },
--};
--static URange32 Shavian_range32[] = {
-- { 66640, 66687 },
--};
--static URange16 Georgian_range16[] = {
-- { 4256, 4293 },
-- { 4304, 4346 },
-- { 4348, 4348 },
-- { 11520, 11557 },
--};
--static URange16 Batak_range16[] = {
-- { 7104, 7155 },
-- { 7164, 7167 },
--};
--static URange16 Devanagari_range16[] = {
-- { 2304, 2384 },
-- { 2387, 2403 },
-- { 2406, 2415 },
-- { 2417, 2423 },
-- { 2425, 2431 },
-- { 43232, 43259 },
--};
--static URange16 Thai_range16[] = {
-- { 3585, 3642 },
-- { 3648, 3675 },
--};
--static URange16 Tibetan_range16[] = {
-- { 3840, 3911 },
-- { 3913, 3948 },
-- { 3953, 3991 },
-- { 3993, 4028 },
-- { 4030, 4044 },
-- { 4046, 4052 },
-- { 4057, 4058 },
--};
--static URange16 Tifinagh_range16[] = {
-- { 11568, 11621 },
-- { 11631, 11632 },
-- { 11647, 11647 },
--};
--static URange32 Ugaritic_range32[] = {
-- { 66432, 66461 },
-- { 66463, 66463 },
--};
--static URange16 Braille_range16[] = {
-- { 10240, 10495 },
--};
--static URange16 Greek_range16[] = {
-- { 880, 883 },
-- { 885, 887 },
-- { 890, 893 },
-- { 900, 900 },
-- { 902, 902 },
-- { 904, 906 },
-- { 908, 908 },
-- { 910, 929 },
-- { 931, 993 },
-- { 1008, 1023 },
-- { 7462, 7466 },
-- { 7517, 7521 },
-- { 7526, 7530 },
-- { 7615, 7615 },
-- { 7936, 7957 },
-- { 7960, 7965 },
-- { 7968, 8005 },
-- { 8008, 8013 },
-- { 8016, 8023 },
-- { 8025, 8025 },
-- { 8027, 8027 },
-- { 8029, 8029 },
-- { 8031, 8061 },
-- { 8064, 8116 },
-- { 8118, 8132 },
-- { 8134, 8147 },
-- { 8150, 8155 },
-- { 8157, 8175 },
-- { 8178, 8180 },
-- { 8182, 8190 },
-- { 8486, 8486 },
--};
--static URange32 Greek_range32[] = {
-- { 65856, 65930 },
-- { 119296, 119365 },
--};
--static URange32 Lycian_range32[] = {
-- { 66176, 66204 },
--};
--static URange16 Tai_Viet_range16[] = {
-- { 43648, 43714 },
-- { 43739, 43743 },
--};
--static URange16 Vai_range16[] = {
-- { 42240, 42539 },
--};
--static URange16 Ogham_range16[] = {
-- { 5760, 5788 },
--};
--static URange32 Inscriptional_Parthian_range32[] = {
-- { 68416, 68437 },
-- { 68440, 68447 },
--};
--static URange16 Cham_range16[] = {
-- { 43520, 43574 },
-- { 43584, 43597 },
-- { 43600, 43609 },
-- { 43612, 43615 },
--};
--static URange16 Syriac_range16[] = {
-- { 1792, 1805 },
-- { 1807, 1866 },
-- { 1869, 1871 },
--};
--static URange16 Runic_range16[] = {
-- { 5792, 5866 },
-- { 5870, 5872 },
--};
--static URange32 Gothic_range32[] = {
-- { 66352, 66378 },
--};
--static URange16 Katakana_range16[] = {
-- { 12449, 12538 },
-- { 12541, 12543 },
-- { 12784, 12799 },
-- { 13008, 13054 },
-- { 13056, 13143 },
-- { 65382, 65391 },
-- { 65393, 65437 },
--};
--static URange32 Katakana_range32[] = {
-- { 110592, 110592 },
--};
--static URange32 Osmanya_range32[] = {
-- { 66688, 66717 },
-- { 66720, 66729 },
--};
--static URange16 New_Tai_Lue_range16[] = {
-- { 6528, 6571 },
-- { 6576, 6601 },
-- { 6608, 6618 },
-- { 6622, 6623 },
--};
--static URange16 Ol_Chiki_range16[] = {
-- { 7248, 7295 },
--};
--static URange16 Limbu_range16[] = {
-- { 6400, 6428 },
-- { 6432, 6443 },
-- { 6448, 6459 },
-- { 6464, 6464 },
-- { 6468, 6479 },
--};
--static URange16 Cherokee_range16[] = {
-- { 5024, 5108 },
--};
--static URange16 Oriya_range16[] = {
-- { 2817, 2819 },
-- { 2821, 2828 },
-- { 2831, 2832 },
-- { 2835, 2856 },
-- { 2858, 2864 },
-- { 2866, 2867 },
-- { 2869, 2873 },
-- { 2876, 2884 },
-- { 2887, 2888 },
-- { 2891, 2893 },
-- { 2902, 2903 },
-- { 2908, 2909 },
-- { 2911, 2915 },
-- { 2918, 2935 },
--};
--static URange16 Gujarati_range16[] = {
-- { 2689, 2691 },
-- { 2693, 2701 },
-- { 2703, 2705 },
-- { 2707, 2728 },
-- { 2730, 2736 },
-- { 2738, 2739 },
-- { 2741, 2745 },
-- { 2748, 2757 },
-- { 2759, 2761 },
-- { 2763, 2765 },
-- { 2768, 2768 },
-- { 2784, 2787 },
-- { 2790, 2799 },
-- { 2801, 2801 },
--};
--static URange32 Inscriptional_Pahlavi_range32[] = {
-- { 68448, 68466 },
-- { 68472, 68479 },
--};
--static URange16 Khmer_range16[] = {
-- { 6016, 6109 },
-- { 6112, 6121 },
-- { 6128, 6137 },
-- { 6624, 6655 },
--};
--static URange32 Cuneiform_range32[] = {
-- { 73728, 74606 },
-- { 74752, 74850 },
-- { 74864, 74867 },
--};
--static URange16 Mandaic_range16[] = {
-- { 2112, 2139 },
-- { 2142, 2142 },
--};
--static URange16 Syloti_Nagri_range16[] = {
-- { 43008, 43051 },
--};
--static URange16 Nko_range16[] = {
-- { 1984, 2042 },
--};
--static URange16 Canadian_Aboriginal_range16[] = {
-- { 5120, 5759 },
-- { 6320, 6389 },
--};
--static URange32 Phoenician_range32[] = {
-- { 67840, 67867 },
-- { 67871, 67871 },
--};
--static URange16 Bengali_range16[] = {
-- { 2433, 2435 },
-- { 2437, 2444 },
-- { 2447, 2448 },
-- { 2451, 2472 },
-- { 2474, 2480 },
-- { 2482, 2482 },
-- { 2486, 2489 },
-- { 2492, 2500 },
-- { 2503, 2504 },
-- { 2507, 2510 },
-- { 2519, 2519 },
-- { 2524, 2525 },
-- { 2527, 2531 },
-- { 2534, 2555 },
--};
--static URange32 Kaithi_range32[] = {
-- { 69760, 69825 },
--};
--static URange16 Glagolitic_range16[] = {
-- { 11264, 11310 },
-- { 11312, 11358 },
--};
--static URange32 Imperial_Aramaic_range32[] = {
-- { 67648, 67669 },
-- { 67671, 67679 },
--};
--static URange16 Gurmukhi_range16[] = {
-- { 2561, 2563 },
-- { 2565, 2570 },
-- { 2575, 2576 },
-- { 2579, 2600 },
-- { 2602, 2608 },
-- { 2610, 2611 },
-- { 2613, 2614 },
-- { 2616, 2617 },
-- { 2620, 2620 },
-- { 2622, 2626 },
-- { 2631, 2632 },
-- { 2635, 2637 },
-- { 2641, 2641 },
-- { 2649, 2652 },
-- { 2654, 2654 },
-- { 2662, 2677 },
--};
--static URange16 Javanese_range16[] = {
-- { 43392, 43469 },
-- { 43471, 43481 },
-- { 43486, 43487 },
--};
--static URange16 Phags_Pa_range16[] = {
-- { 43072, 43127 },
--};
--static URange32 Cypriot_range32[] = {
-- { 67584, 67589 },
-- { 67592, 67592 },
-- { 67594, 67637 },
-- { 67639, 67640 },
-- { 67644, 67644 },
-- { 67647, 67647 },
--};
--static URange16 Kannada_range16[] = {
-- { 3202, 3203 },
-- { 3205, 3212 },
-- { 3214, 3216 },
-- { 3218, 3240 },
-- { 3242, 3251 },
-- { 3253, 3257 },
-- { 3260, 3268 },
-- { 3270, 3272 },
-- { 3274, 3277 },
-- { 3285, 3286 },
-- { 3294, 3294 },
-- { 3296, 3299 },
-- { 3302, 3311 },
-- { 3313, 3314 },
--};
--static URange16 Mongolian_range16[] = {
-- { 6144, 6145 },
-- { 6148, 6148 },
-- { 6150, 6158 },
-- { 6160, 6169 },
-- { 6176, 6263 },
-- { 6272, 6314 },
--};
--static URange16 Sinhala_range16[] = {
-- { 3458, 3459 },
-- { 3461, 3478 },
-- { 3482, 3505 },
-- { 3507, 3515 },
-- { 3517, 3517 },
-- { 3520, 3526 },
-- { 3530, 3530 },
-- { 3535, 3540 },
-- { 3542, 3542 },
-- { 3544, 3551 },
-- { 3570, 3572 },
--};
--static URange32 Brahmi_range32[] = {
-- { 69632, 69709 },
-- { 69714, 69743 },
--};
--static URange32 Deseret_range32[] = {
-- { 66560, 66639 },
--};
--static URange16 Rejang_range16[] = {
-- { 43312, 43347 },
-- { 43359, 43359 },
--};
--static URange16 Yi_range16[] = {
-- { 40960, 42124 },
-- { 42128, 42182 },
--};
--static URange16 Balinese_range16[] = {
-- { 6912, 6987 },
-- { 6992, 7036 },
--};
--static URange16 Lao_range16[] = {
-- { 3713, 3714 },
-- { 3716, 3716 },
-- { 3719, 3720 },
-- { 3722, 3722 },
-- { 3725, 3725 },
-- { 3732, 3735 },
-- { 3737, 3743 },
-- { 3745, 3747 },
-- { 3749, 3749 },
-- { 3751, 3751 },
-- { 3754, 3755 },
-- { 3757, 3769 },
-- { 3771, 3773 },
-- { 3776, 3780 },
-- { 3782, 3782 },
-- { 3784, 3789 },
-- { 3792, 3801 },
-- { 3804, 3805 },
--};
--static URange16 Hanunoo_range16[] = {
-- { 5920, 5940 },
--};
--static URange32 Linear_B_range32[] = {
-- { 65536, 65547 },
-- { 65549, 65574 },
-- { 65576, 65594 },
-- { 65596, 65597 },
-- { 65599, 65613 },
-- { 65616, 65629 },
-- { 65664, 65786 },
--};
--static URange32 Old_Turkic_range32[] = {
-- { 68608, 68680 },
--};
--static URange16 Lepcha_range16[] = {
-- { 7168, 7223 },
-- { 7227, 7241 },
-- { 7245, 7247 },
--};
--static URange32 Lydian_range32[] = {
-- { 67872, 67897 },
-- { 67903, 67903 },
--};
--static URange32 Egyptian_Hieroglyphs_range32[] = {
-- { 77824, 78894 },
--};
--static URange16 Samaritan_range16[] = {
-- { 2048, 2093 },
-- { 2096, 2110 },
--};
--static URange16 Lisu_range16[] = {
-- { 42192, 42239 },
--};
--static URange16 Buhid_range16[] = {
-- { 5952, 5971 },
--};
--static URange16 Common_range16[] = {
-- { 0, 64 },
-- { 91, 96 },
-- { 123, 169 },
-- { 171, 185 },
-- { 187, 191 },
-- { 215, 215 },
-- { 247, 247 },
-- { 697, 735 },
-- { 741, 745 },
-- { 748, 767 },
-- { 884, 884 },
-- { 894, 894 },
-- { 901, 901 },
-- { 903, 903 },
-- { 1417, 1417 },
-- { 1548, 1548 },
-- { 1563, 1563 },
-- { 1567, 1567 },
-- { 1600, 1600 },
-- { 1632, 1641 },
-- { 1757, 1757 },
-- { 2404, 2405 },
-- { 2416, 2416 },
-- { 3647, 3647 },
-- { 4053, 4056 },
-- { 4347, 4347 },
-- { 5867, 5869 },
-- { 5941, 5942 },
-- { 6146, 6147 },
-- { 6149, 6149 },
-- { 7379, 7379 },
-- { 7393, 7393 },
-- { 7401, 7404 },
-- { 7406, 7410 },
-- { 8192, 8203 },
-- { 8206, 8292 },
-- { 8298, 8304 },
-- { 8308, 8318 },
-- { 8320, 8334 },
-- { 8352, 8377 },
-- { 8448, 8485 },
-- { 8487, 8489 },
-- { 8492, 8497 },
-- { 8499, 8525 },
-- { 8527, 8543 },
-- { 8585, 8585 },
-- { 8592, 9203 },
-- { 9216, 9254 },
-- { 9280, 9290 },
-- { 9312, 9983 },
-- { 9985, 10186 },
-- { 10188, 10188 },
-- { 10190, 10239 },
-- { 10496, 11084 },
-- { 11088, 11097 },
-- { 11776, 11825 },
-- { 12272, 12283 },
-- { 12288, 12292 },
-- { 12294, 12294 },
-- { 12296, 12320 },
-- { 12336, 12343 },
-- { 12348, 12351 },
-- { 12443, 12444 },
-- { 12448, 12448 },
-- { 12539, 12540 },
-- { 12688, 12703 },
-- { 12736, 12771 },
-- { 12832, 12895 },
-- { 12927, 13007 },
-- { 13144, 13311 },
-- { 19904, 19967 },
-- { 42752, 42785 },
-- { 42888, 42890 },
-- { 43056, 43065 },
-- { 64830, 64831 },
-- { 65021, 65021 },
-- { 65040, 65049 },
-- { 65072, 65106 },
-- { 65108, 65126 },
-- { 65128, 65131 },
-- { 65279, 65279 },
-- { 65281, 65312 },
-- { 65339, 65344 },
-- { 65371, 65381 },
-- { 65392, 65392 },
-- { 65438, 65439 },
-- { 65504, 65510 },
-- { 65512, 65518 },
-- { 65529, 65533 },
--};
--static URange32 Common_range32[] = {
-- { 65792, 65794 },
-- { 65799, 65843 },
-- { 65847, 65855 },
-- { 65936, 65947 },
-- { 66000, 66044 },
-- { 118784, 119029 },
-- { 119040, 119078 },
-- { 119081, 119142 },
-- { 119146, 119162 },
-- { 119171, 119172 },
-- { 119180, 119209 },
-- { 119214, 119261 },
-- { 119552, 119638 },
-- { 119648, 119665 },
-- { 119808, 119892 },
-- { 119894, 119964 },
-- { 119966, 119967 },
-- { 119970, 119970 },
-- { 119973, 119974 },
-- { 119977, 119980 },
-- { 119982, 119993 },
-- { 119995, 119995 },
-- { 119997, 120003 },
-- { 120005, 120069 },
-- { 120071, 120074 },
-- { 120077, 120084 },
-- { 120086, 120092 },
-- { 120094, 120121 },
-- { 120123, 120126 },
-- { 120128, 120132 },
-- { 120134, 120134 },
-- { 120138, 120144 },
-- { 120146, 120485 },
-- { 120488, 120779 },
-- { 120782, 120831 },
-- { 126976, 127019 },
-- { 127024, 127123 },
-- { 127136, 127150 },
-- { 127153, 127166 },
-- { 127169, 127183 },
-- { 127185, 127199 },
-- { 127232, 127242 },
-- { 127248, 127278 },
-- { 127280, 127337 },
-- { 127344, 127386 },
-- { 127462, 127487 },
-- { 127489, 127490 },
-- { 127504, 127546 },
-- { 127552, 127560 },
-- { 127568, 127569 },
-- { 127744, 127776 },
-- { 127792, 127797 },
-- { 127799, 127868 },
-- { 127872, 127891 },
-- { 127904, 127940 },
-- { 127942, 127946 },
-- { 127968, 127984 },
-- { 128000, 128062 },
-- { 128064, 128064 },
-- { 128066, 128247 },
-- { 128249, 128252 },
-- { 128256, 128317 },
-- { 128336, 128359 },
-- { 128507, 128511 },
-- { 128513, 128528 },
-- { 128530, 128532 },
-- { 128534, 128534 },
-- { 128536, 128536 },
-- { 128538, 128538 },
-- { 128540, 128542 },
-- { 128544, 128549 },
-- { 128552, 128555 },
-- { 128557, 128557 },
-- { 128560, 128563 },
-- { 128565, 128576 },
-- { 128581, 128591 },
-- { 128640, 128709 },
-- { 128768, 128883 },
-- { 917505, 917505 },
-- { 917536, 917631 },
--};
--static URange16 Coptic_range16[] = {
-- { 994, 1007 },
-- { 11392, 11505 },
-- { 11513, 11519 },
--};
--static URange16 Arabic_range16[] = {
-- { 1536, 1539 },
-- { 1542, 1547 },
-- { 1549, 1562 },
-- { 1566, 1566 },
-- { 1568, 1599 },
-- { 1601, 1610 },
-- { 1622, 1630 },
-- { 1642, 1647 },
-- { 1649, 1756 },
-- { 1758, 1791 },
-- { 1872, 1919 },
-- { 64336, 64449 },
-- { 64467, 64829 },
-- { 64848, 64911 },
-- { 64914, 64967 },
-- { 65008, 65020 },
-- { 65136, 65140 },
-- { 65142, 65276 },
--};
--static URange32 Arabic_range32[] = {
-- { 69216, 69246 },
--};
--static URange16 Bamum_range16[] = {
-- { 42656, 42743 },
--};
--static URange32 Bamum_range32[] = {
-- { 92160, 92728 },
--};
--static URange16 Myanmar_range16[] = {
-- { 4096, 4255 },
-- { 43616, 43643 },
--};
--static URange32 Avestan_range32[] = {
-- { 68352, 68405 },
-- { 68409, 68415 },
--};
--static URange16 Hebrew_range16[] = {
-- { 1425, 1479 },
-- { 1488, 1514 },
-- { 1520, 1524 },
-- { 64285, 64310 },
-- { 64312, 64316 },
-- { 64318, 64318 },
-- { 64320, 64321 },
-- { 64323, 64324 },
-- { 64326, 64335 },
--};
--// 3804 16-bit ranges, 582 32-bit ranges
--UGroup unicode_groups[] = {
-- { "Arabic", +1, Arabic_range16, 18, Arabic_range32, 1 },
-- { "Armenian", +1, Armenian_range16, 5, 0, 0 },
-- { "Avestan", +1, 0, 0, Avestan_range32, 2 },
-- { "Balinese", +1, Balinese_range16, 2, 0, 0 },
-- { "Bamum", +1, Bamum_range16, 1, Bamum_range32, 1 },
-- { "Batak", +1, Batak_range16, 2, 0, 0 },
-- { "Bengali", +1, Bengali_range16, 14, 0, 0 },
-- { "Bopomofo", +1, Bopomofo_range16, 3, 0, 0 },
-- { "Brahmi", +1, 0, 0, Brahmi_range32, 2 },
-- { "Braille", +1, Braille_range16, 1, 0, 0 },
-- { "Buginese", +1, Buginese_range16, 2, 0, 0 },
-- { "Buhid", +1, Buhid_range16, 1, 0, 0 },
-- { "C", +1, C_range16, 14, C_range32, 6 },
-- { "Canadian_Aboriginal", +1, Canadian_Aboriginal_range16, 2, 0, 0 },
-- { "Carian", +1, 0, 0, Carian_range32, 1 },
-- { "Cc", +1, Cc_range16, 2, 0, 0 },
-- { "Cf", +1, Cf_range16, 11, Cf_range32, 4 },
-- { "Cham", +1, Cham_range16, 4, 0, 0 },
-- { "Cherokee", +1, Cherokee_range16, 1, 0, 0 },
-- { "Co", +1, Co_range16, 1, Co_range32, 2 },
-- { "Common", +1, Common_range16, 89, Common_range32, 80 },
-- { "Coptic", +1, Coptic_range16, 3, 0, 0 },
-- { "Cs", +1, Cs_range16, 1, 0, 0 },
-- { "Cuneiform", +1, 0, 0, Cuneiform_range32, 3 },
-- { "Cypriot", +1, 0, 0, Cypriot_range32, 6 },
-- { "Cyrillic", +1, Cyrillic_range16, 7, 0, 0 },
-- { "Deseret", +1, 0, 0, Deseret_range32, 1 },
-- { "Devanagari", +1, Devanagari_range16, 6, 0, 0 },
-- { "Egyptian_Hieroglyphs", +1, 0, 0, Egyptian_Hieroglyphs_range32, 1 },
-- { "Ethiopic", +1, Ethiopic_range16, 32, 0, 0 },
-- { "Georgian", +1, Georgian_range16, 4, 0, 0 },
-- { "Glagolitic", +1, Glagolitic_range16, 2, 0, 0 },
-- { "Gothic", +1, 0, 0, Gothic_range32, 1 },
-- { "Greek", +1, Greek_range16, 31, Greek_range32, 2 },
-- { "Gujarati", +1, Gujarati_range16, 14, 0, 0 },
-- { "Gurmukhi", +1, Gurmukhi_range16, 16, 0, 0 },
-- { "Han", +1, Han_range16, 12, Han_range32, 4 },
-- { "Hangul", +1, Hangul_range16, 14, 0, 0 },
-- { "Hanunoo", +1, Hanunoo_range16, 1, 0, 0 },
-- { "Hebrew", +1, Hebrew_range16, 9, 0, 0 },
-- { "Hiragana", +1, Hiragana_range16, 2, Hiragana_range32, 2 },
-- { "Imperial_Aramaic", +1, 0, 0, Imperial_Aramaic_range32, 2 },
-- { "Inherited", +1, Inherited_range16, 18, Inherited_range32, 6 },
-- { "Inscriptional_Pahlavi", +1, 0, 0, Inscriptional_Pahlavi_range32, 2 },
-- { "Inscriptional_Parthian", +1, 0, 0, Inscriptional_Parthian_range32, 2 },
-- { "Javanese", +1, Javanese_range16, 3, 0, 0 },
-- { "Kaithi", +1, 0, 0, Kaithi_range32, 1 },
-- { "Kannada", +1, Kannada_range16, 14, 0, 0 },
-- { "Katakana", +1, Katakana_range16, 7, Katakana_range32, 1 },
-- { "Kayah_Li", +1, Kayah_Li_range16, 1, 0, 0 },
-- { "Kharoshthi", +1, 0, 0, Kharoshthi_range32, 8 },
-- { "Khmer", +1, Khmer_range16, 4, 0, 0 },
-- { "L", +1, L_range16, 362, L_range32, 73 },
-- { "Lao", +1, Lao_range16, 18, 0, 0 },
-- { "Latin", +1, Latin_range16, 30, 0, 0 },
-- { "Lepcha", +1, Lepcha_range16, 3, 0, 0 },
-- { "Limbu", +1, Limbu_range16, 5, 0, 0 },
-- { "Linear_B", +1, 0, 0, Linear_B_range32, 7 },
-- { "Lisu", +1, Lisu_range16, 1, 0, 0 },
-- { "Ll", +1, Ll_range16, 580, Ll_range32, 29 },
-- { "Lm", +1, Lm_range16, 49, 0, 0 },
-- { "Lo", +1, Lo_range16, 280, Lo_range32, 43 },
-- { "Lt", +1, Lt_range16, 10, 0, 0 },
-- { "Lu", +1, Lu_range16, 571, Lu_range32, 32 },
-- { "Lycian", +1, 0, 0, Lycian_range32, 1 },
-- { "Lydian", +1, 0, 0, Lydian_range32, 2 },
-- { "M", +1, M_range16, 176, M_range32, 17 },
-- { "Malayalam", +1, Malayalam_range16, 11, 0, 0 },
-- { "Mandaic", +1, Mandaic_range16, 2, 0, 0 },
-- { "Mc", +1, Mc_range16, 106, Mc_range32, 7 },
-- { "Me", +1, Me_range16, 4, 0, 0 },
-- { "Meetei_Mayek", +1, Meetei_Mayek_range16, 2, 0, 0 },
-- { "Mn", +1, Mn_range16, 186, Mn_range32, 17 },
-- { "Mongolian", +1, Mongolian_range16, 6, 0, 0 },
-- { "Myanmar", +1, Myanmar_range16, 2, 0, 0 },
-- { "N", +1, N_range16, 63, N_range32, 20 },
-- { "Nd", +1, Nd_range16, 35, Nd_range32, 3 },
-- { "New_Tai_Lue", +1, New_Tai_Lue_range16, 4, 0, 0 },
-- { "Nko", +1, Nko_range16, 1, 0, 0 },
-- { "Nl", +1, Nl_range16, 7, Nl_range32, 5 },
-- { "No", +1, No_range16, 27, No_range32, 14 },
-- { "Ogham", +1, Ogham_range16, 1, 0, 0 },
-- { "Ol_Chiki", +1, Ol_Chiki_range16, 1, 0, 0 },
-- { "Old_Italic", +1, 0, 0, Old_Italic_range32, 2 },
-- { "Old_Persian", +1, 0, 0, Old_Persian_range32, 2 },
-- { "Old_South_Arabian", +1, 0, 0, Old_South_Arabian_range32, 1 },
-- { "Old_Turkic", +1, 0, 0, Old_Turkic_range32, 1 },
-- { "Oriya", +1, Oriya_range16, 14, 0, 0 },
-- { "Osmanya", +1, 0, 0, Osmanya_range32, 2 },
-- { "P", +1, P_range16, 120, P_range32, 13 },
-- { "Pc", +1, Pc_range16, 6, 0, 0 },
-- { "Pd", +1, Pd_range16, 15, 0, 0 },
-- { "Pe", +1, Pe_range16, 70, 0, 0 },
-- { "Pf", +1, Pf_range16, 10, 0, 0 },
-- { "Phags_Pa", +1, Phags_Pa_range16, 1, 0, 0 },
-- { "Phoenician", +1, 0, 0, Phoenician_range32, 2 },
-- { "Pi", +1, Pi_range16, 11, 0, 0 },
-- { "Po", +1, Po_range16, 115, Po_range32, 13 },
-- { "Ps", +1, Ps_range16, 72, 0, 0 },
-- { "Rejang", +1, Rejang_range16, 2, 0, 0 },
-- { "Runic", +1, Runic_range16, 2, 0, 0 },
-- { "S", +1, S_range16, 142, S_range32, 66 },
-- { "Samaritan", +1, Samaritan_range16, 2, 0, 0 },
-- { "Saurashtra", +1, Saurashtra_range16, 2, 0, 0 },
-- { "Sc", +1, Sc_range16, 16, 0, 0 },
-- { "Shavian", +1, 0, 0, Shavian_range32, 1 },
-- { "Sinhala", +1, Sinhala_range16, 11, 0, 0 },
-- { "Sk", +1, Sk_range16, 27, 0, 0 },
-- { "Sm", +1, Sm_range16, 56, Sm_range32, 10 },
-- { "So", +1, So_range16, 108, So_range32, 56 },
-- { "Sundanese", +1, Sundanese_range16, 2, 0, 0 },
-- { "Syloti_Nagri", +1, Syloti_Nagri_range16, 1, 0, 0 },
-- { "Syriac", +1, Syriac_range16, 3, 0, 0 },
-- { "Tagalog", +1, Tagalog_range16, 2, 0, 0 },
-- { "Tagbanwa", +1, Tagbanwa_range16, 3, 0, 0 },
-- { "Tai_Le", +1, Tai_Le_range16, 2, 0, 0 },
-- { "Tai_Tham", +1, Tai_Tham_range16, 5, 0, 0 },
-- { "Tai_Viet", +1, Tai_Viet_range16, 2, 0, 0 },
-- { "Tamil", +1, Tamil_range16, 16, 0, 0 },
-- { "Telugu", +1, Telugu_range16, 14, 0, 0 },
-- { "Thaana", +1, Thaana_range16, 1, 0, 0 },
-- { "Thai", +1, Thai_range16, 2, 0, 0 },
-- { "Tibetan", +1, Tibetan_range16, 7, 0, 0 },
-- { "Tifinagh", +1, Tifinagh_range16, 3, 0, 0 },
-- { "Ugaritic", +1, 0, 0, Ugaritic_range32, 2 },
-- { "Vai", +1, Vai_range16, 1, 0, 0 },
-- { "Yi", +1, Yi_range16, 2, 0, 0 },
-- { "Z", +1, Z_range16, 9, 0, 0 },
-- { "Zl", +1, Zl_range16, 1, 0, 0 },
-- { "Zp", +1, Zp_range16, 1, 0, 0 },
-- { "Zs", +1, Zs_range16, 8, 0, 0 },
--};
--int num_unicode_groups = 131;
--
--
--} // namespace re2
--
--
-diff --git a/re2/re2/unicode_groups.h b/re2/re2/unicode_groups.h
-deleted file mode 100644
-index f91c51f..0000000
---- a/re2/re2/unicode_groups.h
-+++ /dev/null
-@@ -1,64 +0,0 @@
--// Copyright 2008 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Unicode character groups.
--
--// The codes get split into ranges of 16-bit codes
--// and ranges of 32-bit codes. It would be simpler
--// to use only 32-bit ranges, but these tables are large
--// enough to warrant extra care.
--//
--// Using just 32-bit ranges gives 27 kB of data.
--// Adding 16-bit ranges gives 18 kB of data.
--// Adding an extra table of 16-bit singletons would reduce
--// to 16.5 kB of data but make the data harder to use;
--// we don't bother.
--
--#ifndef RE2_UNICODE_GROUPS_H__
--#define RE2_UNICODE_GROUPS_H__
--
--#include "util/util.h"
--
--namespace re2 {
--
--struct URange16
--{
-- uint16 lo;
-- uint16 hi;
--};
--
--struct URange32
--{
-- uint32 lo;
-- uint32 hi;
--};
--
--struct UGroup
--{
-- const char *name;
-- int sign; // +1 for [abc], -1 for [^abc]
-- URange16 *r16;
-- int nr16;
-- URange32 *r32;
-- int nr32;
--};
--
--// Named by property or script name (e.g., "Nd", "N", "Han").
--// Negated groups are not included.
--extern UGroup unicode_groups[];
--extern int num_unicode_groups;
--
--// Named by POSIX name (e.g., "[:alpha:]", "[:^lower:]").
--// Negated groups are included.
--extern UGroup posix_groups[];
--extern int num_posix_groups;
--
--// Named by Perl name (e.g., "\\d", "\\D").
--// Negated groups are included.
--extern UGroup perl_groups[];
--extern int num_perl_groups;
--
--} // namespace re2
--
--#endif // RE2_UNICODE_GROUPS_H__
-diff --git a/re2/re2/variadic_function.h b/re2/re2/variadic_function.h
-deleted file mode 100644
-index 8d2b763..0000000
---- a/re2/re2/variadic_function.h
-+++ /dev/null
-@@ -1,346 +0,0 @@
--// Copyright 2010 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--#ifndef RE2_VARIADIC_FUNCTION_H_
--#define RE2_VARIADIC_FUNCTION_H_
--
--namespace re2 {
--
--template <typename Result, typename Param0, typename Param1, typename Arg,
-- Result (*Func)(Param0, Param1, const Arg* const [], int count)>
--class VariadicFunction2 {
-- public:
-- VariadicFunction2() {}
--
-- Result operator()(Param0 p0, Param1 p1) const {
-- return Func(p0, p1, 0, 0);
-- }
--
-- Result operator()(Param0 p0, Param1 p1, const Arg& a0) const {
-- const Arg* const args[] = { &a0 };
-- return Func(p0, p1, args, 1);
-- }
--
-- Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1) const {
-- const Arg* const args[] = { &a0, &a1 };
-- return Func(p0, p1, args, 2);
-- }
--
-- Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
-- const Arg& a2) const {
-- const Arg* const args[] = { &a0, &a1, &a2 };
-- return Func(p0, p1, args, 3);
-- }
--
-- Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
-- const Arg& a2, const Arg& a3) const {
-- const Arg* const args[] = { &a0, &a1, &a2, &a3 };
-- return Func(p0, p1, args, 4);
-- }
--
-- Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
-- const Arg& a2, const Arg& a3, const Arg& a4) const {
-- const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4 };
-- return Func(p0, p1, args, 5);
-- }
--
-- Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
-- const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5) const {
-- const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5 };
-- return Func(p0, p1, args, 6);
-- }
--
-- Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
-- const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
-- const Arg& a6) const {
-- const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6 };
-- return Func(p0, p1, args, 7);
-- }
--
-- Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
-- const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
-- const Arg& a6, const Arg& a7) const {
-- const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7 };
-- return Func(p0, p1, args, 8);
-- }
--
-- Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
-- const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
-- const Arg& a6, const Arg& a7, const Arg& a8) const {
-- const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8 };
-- return Func(p0, p1, args, 9);
-- }
--
-- Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
-- const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
-- const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9) const {
-- const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
-- &a9 };
-- return Func(p0, p1, args, 10);
-- }
--
-- Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
-- const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
-- const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
-- const Arg& a10) const {
-- const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
-- &a9, &a10 };
-- return Func(p0, p1, args, 11);
-- }
--
-- Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
-- const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
-- const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
-- const Arg& a10, const Arg& a11) const {
-- const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
-- &a9, &a10, &a11 };
-- return Func(p0, p1, args, 12);
-- }
--
-- Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
-- const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
-- const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
-- const Arg& a10, const Arg& a11, const Arg& a12) const {
-- const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
-- &a9, &a10, &a11, &a12 };
-- return Func(p0, p1, args, 13);
-- }
--
-- Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
-- const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
-- const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
-- const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13) const {
-- const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
-- &a9, &a10, &a11, &a12, &a13 };
-- return Func(p0, p1, args, 14);
-- }
--
-- Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
-- const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
-- const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
-- const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
-- const Arg& a14) const {
-- const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
-- &a9, &a10, &a11, &a12, &a13, &a14 };
-- return Func(p0, p1, args, 15);
-- }
--
-- Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
-- const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
-- const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
-- const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
-- const Arg& a14, const Arg& a15) const {
-- const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
-- &a9, &a10, &a11, &a12, &a13, &a14, &a15 };
-- return Func(p0, p1, args, 16);
-- }
--
-- Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
-- const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
-- const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
-- const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
-- const Arg& a14, const Arg& a15, const Arg& a16) const {
-- const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
-- &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16 };
-- return Func(p0, p1, args, 17);
-- }
--
-- Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
-- const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
-- const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
-- const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
-- const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17) const {
-- const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
-- &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17 };
-- return Func(p0, p1, args, 18);
-- }
--
-- Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
-- const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
-- const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
-- const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
-- const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
-- const Arg& a18) const {
-- const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
-- &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18 };
-- return Func(p0, p1, args, 19);
-- }
--
-- Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
-- const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
-- const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
-- const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
-- const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
-- const Arg& a18, const Arg& a19) const {
-- const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
-- &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19 };
-- return Func(p0, p1, args, 20);
-- }
--
-- Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
-- const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
-- const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
-- const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
-- const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
-- const Arg& a18, const Arg& a19, const Arg& a20) const {
-- const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
-- &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19,
-- &a20 };
-- return Func(p0, p1, args, 21);
-- }
--
-- Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
-- const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
-- const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
-- const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
-- const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
-- const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21) const {
-- const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
-- &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20,
-- &a21 };
-- return Func(p0, p1, args, 22);
-- }
--
-- Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
-- const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
-- const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
-- const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
-- const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
-- const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21,
-- const Arg& a22) const {
-- const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
-- &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20,
-- &a21, &a22 };
-- return Func(p0, p1, args, 23);
-- }
--
-- Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
-- const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
-- const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
-- const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
-- const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
-- const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21,
-- const Arg& a22, const Arg& a23) const {
-- const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
-- &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20,
-- &a21, &a22, &a23 };
-- return Func(p0, p1, args, 24);
-- }
--
-- Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
-- const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
-- const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
-- const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
-- const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
-- const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21,
-- const Arg& a22, const Arg& a23, const Arg& a24) const {
-- const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
-- &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20,
-- &a21, &a22, &a23, &a24 };
-- return Func(p0, p1, args, 25);
-- }
--
-- Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
-- const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
-- const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
-- const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
-- const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
-- const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21,
-- const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25) const {
-- const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
-- &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20,
-- &a21, &a22, &a23, &a24, &a25 };
-- return Func(p0, p1, args, 26);
-- }
--
-- Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
-- const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
-- const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
-- const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
-- const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
-- const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21,
-- const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25,
-- const Arg& a26) const {
-- const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
-- &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20,
-- &a21, &a22, &a23, &a24, &a25, &a26 };
-- return Func(p0, p1, args, 27);
-- }
--
-- Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
-- const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
-- const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
-- const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
-- const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
-- const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21,
-- const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25,
-- const Arg& a26, const Arg& a27) const {
-- const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
-- &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20,
-- &a21, &a22, &a23, &a24, &a25, &a26, &a27 };
-- return Func(p0, p1, args, 28);
-- }
--
-- Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
-- const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
-- const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
-- const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
-- const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
-- const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21,
-- const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25,
-- const Arg& a26, const Arg& a27, const Arg& a28) const {
-- const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
-- &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20,
-- &a21, &a22, &a23, &a24, &a25, &a26, &a27, &a28 };
-- return Func(p0, p1, args, 29);
-- }
--
-- Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
-- const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
-- const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
-- const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
-- const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
-- const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21,
-- const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25,
-- const Arg& a26, const Arg& a27, const Arg& a28, const Arg& a29) const {
-- const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
-- &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20,
-- &a21, &a22, &a23, &a24, &a25, &a26, &a27, &a28, &a29 };
-- return Func(p0, p1, args, 30);
-- }
--
-- Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
-- const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
-- const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
-- const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
-- const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
-- const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21,
-- const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25,
-- const Arg& a26, const Arg& a27, const Arg& a28, const Arg& a29,
-- const Arg& a30) const {
-- const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
-- &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20,
-- &a21, &a22, &a23, &a24, &a25, &a26, &a27, &a28, &a29, &a30 };
-- return Func(p0, p1, args, 31);
-- }
--
-- Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
-- const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
-- const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
-- const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
-- const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
-- const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21,
-- const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25,
-- const Arg& a26, const Arg& a27, const Arg& a28, const Arg& a29,
-- const Arg& a30, const Arg& a31) const {
-- const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
-- &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20,
-- &a21, &a22, &a23, &a24, &a25, &a26, &a27, &a28, &a29, &a30, &a31 };
-- return Func(p0, p1, args, 32);
-- }
--};
--
--} // namespace re2
--
--#endif // RE2_VARIADIC_FUNCTION_H_
-diff --git a/re2/re2/walker-inl.h b/re2/re2/walker-inl.h
-deleted file mode 100644
-index 4d2045f..0000000
---- a/re2/re2/walker-inl.h
-+++ /dev/null
-@@ -1,244 +0,0 @@
--// Copyright 2006 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Helper class for traversing Regexps without recursion.
--// Clients should declare their own subclasses that override
--// the PreVisit and PostVisit methods, which are called before
--// and after visiting the subexpressions.
--
--// Not quite the Visitor pattern, because (among other things)
--// the Visitor pattern is recursive.
--
--#ifndef RE2_WALKER_INL_H__
--#define RE2_WALKER_INL_H__
--
--#include "re2/regexp.h"
--
--namespace re2 {
--
--template<typename T> struct WalkState;
--
--template<typename T> class Regexp::Walker {
-- public:
-- Walker();
-- virtual ~Walker();
--
-- // Virtual method called before visiting re's children.
-- // PreVisit passes ownership of its return value to its caller.
-- // The Arg* that PreVisit returns will be passed to PostVisit as pre_arg
-- // and passed to the child PreVisits and PostVisits as parent_arg.
-- // At the top-most Regexp, parent_arg is arg passed to walk.
-- // If PreVisit sets *stop to true, the walk does not recurse
-- // into the children. Instead it behaves as though the return
-- // value from PreVisit is the return value from PostVisit.
-- // The default PreVisit returns parent_arg.
-- virtual T PreVisit(Regexp* re, T parent_arg, bool* stop);
--
-- // Virtual method called after visiting re's children.
-- // The pre_arg is the T that PreVisit returned.
-- // The child_args is a vector of the T that the child PostVisits returned.
-- // PostVisit takes ownership of pre_arg.
-- // PostVisit takes ownership of the Ts
-- // in *child_args, but not the vector itself.
-- // PostVisit passes ownership of its return value
-- // to its caller.
-- // The default PostVisit simply returns pre_arg.
-- virtual T PostVisit(Regexp* re, T parent_arg, T pre_arg,
-- T* child_args, int nchild_args);
--
-- // Virtual method called to copy a T,
-- // when Walk notices that more than one child is the same re.
-- virtual T Copy(T arg);
--
-- // Virtual method called to do a "quick visit" of the re,
-- // but not its children. Only called once the visit budget
-- // has been used up and we're trying to abort the walk
-- // as quickly as possible. Should return a value that
-- // makes sense for the parent PostVisits still to be run.
-- // This function is (hopefully) only called by
-- // WalkExponential, but must be implemented by all clients,
-- // just in case.
-- virtual T ShortVisit(Regexp* re, T parent_arg) = 0;
--
-- // Walks over a regular expression.
-- // Top_arg is passed as parent_arg to PreVisit and PostVisit of re.
-- // Returns the T returned by PostVisit on re.
-- T Walk(Regexp* re, T top_arg);
--
-- // Like Walk, but doesn't use Copy. This can lead to
-- // exponential runtimes on cross-linked Regexps like the
-- // ones generated by Simplify. To help limit this,
-- // at most max_visits nodes will be visited and then
-- // the walk will be cut off early.
-- // If the walk *is* cut off early, ShortVisit(re)
-- // will be called on regexps that cannot be fully
-- // visited rather than calling PreVisit/PostVisit.
-- T WalkExponential(Regexp* re, T top_arg, int max_visits);
--
-- // Clears the stack. Should never be necessary, since
-- // Walk always enters and exits with an empty stack.
-- // Logs DFATAL if stack is not already clear.
-- void Reset();
--
-- // Returns whether walk was cut off.
-- bool stopped_early() { return stopped_early_; }
--
-- private:
-- // Walk state for the entire traversal.
-- stack<WalkState<T> >* stack_;
-- bool stopped_early_;
-- int max_visits_;
--
-- T WalkInternal(Regexp* re, T top_arg, bool use_copy);
--
-- DISALLOW_EVIL_CONSTRUCTORS(Walker);
--};
--
--template<typename T> T Regexp::Walker<T>::PreVisit(Regexp* re,
-- T parent_arg,
-- bool* stop) {
-- return parent_arg;
--}
--
--template<typename T> T Regexp::Walker<T>::PostVisit(Regexp* re,
-- T parent_arg,
-- T pre_arg,
-- T* child_args,
-- int nchild_args) {
-- return pre_arg;
--}
--
--template<typename T> T Regexp::Walker<T>::Copy(T arg) {
-- return arg;
--}
--
--// State about a single level in the traversal.
--template<typename T> struct WalkState {
-- WalkState<T>(Regexp* re, T parent)
-- : re(re),
-- n(-1),
-- parent_arg(parent),
-- child_args(NULL) { }
--
-- Regexp* re; // The regexp
-- int n; // The index of the next child to process; -1 means need to PreVisit
-- T parent_arg; // Accumulated arguments.
-- T pre_arg;
-- T child_arg; // One-element buffer for child_args.
-- T* child_args;
--};
--
--template<typename T> Regexp::Walker<T>::Walker() {
-- stack_ = new stack<WalkState<T> >;
-- stopped_early_ = false;
--}
--
--template<typename T> Regexp::Walker<T>::~Walker() {
-- Reset();
-- delete stack_;
--}
--
--// Clears the stack. Should never be necessary, since
--// Walk always enters and exits with an empty stack.
--// Logs DFATAL if stack is not already clear.
--template<typename T> void Regexp::Walker<T>::Reset() {
-- if (stack_ && stack_->size() > 0) {
-- LOG(DFATAL) << "Stack not empty.";
-- while (stack_->size() > 0) {
-- delete stack_->top().child_args;
-- stack_->pop();
-- }
-- }
--}
--
--template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg,
-- bool use_copy) {
-- Reset();
--
-- if (re == NULL) {
-- LOG(DFATAL) << "Walk NULL";
-- return top_arg;
-- }
--
-- stack_->push(WalkState<T>(re, top_arg));
--
-- WalkState<T>* s;
-- for (;;) {
-- T t;
-- s = &stack_->top();
-- Regexp* re = s->re;
-- switch (s->n) {
-- case -1: {
-- if (--max_visits_ < 0) {
-- stopped_early_ = true;
-- t = ShortVisit(re, s->parent_arg);
-- break;
-- }
-- bool stop = false;
-- s->pre_arg = PreVisit(re, s->parent_arg, &stop);
-- if (stop) {
-- t = s->pre_arg;
-- break;
-- }
-- s->n = 0;
-- s->child_args = NULL;
-- if (re->nsub_ == 1)
-- s->child_args = &s->child_arg;
-- else if (re->nsub_ > 1)
-- s->child_args = new T[re->nsub_];
-- // Fall through.
-- }
-- default: {
-- if (re->nsub_ > 0) {
-- Regexp** sub = re->sub();
-- if (s->n < re->nsub_) {
-- if (use_copy && s->n > 0 && sub[s->n - 1] == sub[s->n]) {
-- s->child_args[s->n] = Copy(s->child_args[s->n - 1]);
-- s->n++;
-- } else {
-- stack_->push(WalkState<T>(sub[s->n], s->pre_arg));
-- }
-- continue;
-- }
-- }
--
-- t = PostVisit(re, s->parent_arg, s->pre_arg, s->child_args, s->n);
-- if (re->nsub_ > 1)
-- delete[] s->child_args;
-- break;
-- }
-- }
--
-- // We've finished stack_->top().
-- // Update next guy down.
-- stack_->pop();
-- if (stack_->size() == 0)
-- return t;
-- s = &stack_->top();
-- if (s->child_args != NULL)
-- s->child_args[s->n] = t;
-- else
-- s->child_arg = t;
-- s->n++;
-- }
--}
--
--template<typename T> T Regexp::Walker<T>::Walk(Regexp* re, T top_arg) {
-- // Without the exponential walking behavior,
-- // this budget should be more than enough for any
-- // regexp, and yet not enough to get us in trouble
-- // as far as CPU time.
-- max_visits_ = 1000000;
-- return WalkInternal(re, top_arg, true);
--}
--
--template<typename T> T Regexp::Walker<T>::WalkExponential(Regexp* re, T top_arg,
-- int max_visits) {
-- max_visits_ = max_visits;
-- return WalkInternal(re, top_arg, false);
--}
--
--} // namespace re2
--
--#endif // RE2_WALKER_INL_H__
-diff --git a/re2/runtests b/re2/runtests
-deleted file mode 100755
-index aadcb92..0000000
---- a/re2/runtests
-+++ /dev/null
-@@ -1,21 +0,0 @@
--#!/usr/bin/env bash
--
--success=true
--for i
--do
-- printf "%-40s" $i
-- if sh -c "$i >$i.log 2>&1" 2>/dev/null
-- then
-- echo PASS
-- else
-- echo FAIL';' output in $i.log
-- success=false
-- fi
--done
--
--if $success; then
-- echo 'ALL TESTS PASSED.'
-- exit 0
--fi
--echo 'TESTS FAILED.'
--exit 1
-diff --git a/re2/testinstall.cc b/re2/testinstall.cc
-deleted file mode 100644
-index e369cea..0000000
---- a/re2/testinstall.cc
-+++ /dev/null
-@@ -1,13 +0,0 @@
--#include <re2/re2.h>
--#include <stdio.h>
--
--using namespace re2;
--
--int main(void) {
-- if(RE2::FullMatch("axbyc", "a.*b.*c")) {
-- printf("PASS\n");
-- return 0;
-- }
-- printf("FAIL\n");
-- return 2;
--}
-diff --git a/re2/util/arena.cc b/re2/util/arena.cc
-deleted file mode 100644
-index 25753c5..0000000
---- a/re2/util/arena.cc
-+++ /dev/null
-@@ -1,168 +0,0 @@
--// Copyright 2000 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--#include "util/util.h"
--
--namespace re2 {
--
--// ----------------------------------------------------------------------
--// UnsafeArena::UnsafeArena()
--// UnsafeArena::~UnsafeArena()
--// Destroying the arena automatically calls Reset()
--// ----------------------------------------------------------------------
--
--
--UnsafeArena::UnsafeArena(const size_t block_size)
-- : block_size_(block_size),
-- freestart_(NULL), // set for real in Reset()
-- last_alloc_(NULL),
-- remaining_(0),
-- blocks_alloced_(1),
-- overflow_blocks_(NULL) {
-- assert(block_size > kDefaultAlignment);
--
-- first_blocks_[0].mem = reinterpret_cast<char*>(malloc(block_size_));
-- first_blocks_[0].size = block_size_;
--
-- Reset();
--}
--
--UnsafeArena::~UnsafeArena() {
-- FreeBlocks();
-- assert(overflow_blocks_ == NULL); // FreeBlocks() should do that
-- // The first X blocks stay allocated always by default. Delete them now.
-- for (int i = 0; i < blocks_alloced_; i++)
-- free(first_blocks_[i].mem);
--}
--
--// ----------------------------------------------------------------------
--// UnsafeArena::Reset()
--// Clears all the memory an arena is using.
--// ----------------------------------------------------------------------
--
--void UnsafeArena::Reset() {
-- FreeBlocks();
-- freestart_ = first_blocks_[0].mem;
-- remaining_ = first_blocks_[0].size;
-- last_alloc_ = NULL;
--
-- // We do not know for sure whether or not the first block is aligned,
-- // so we fix that right now.
-- const int overage = reinterpret_cast<uintptr_t>(freestart_) &
-- (kDefaultAlignment-1);
-- if (overage > 0) {
-- const int waste = kDefaultAlignment - overage;
-- freestart_ += waste;
-- remaining_ -= waste;
-- }
-- freestart_when_empty_ = freestart_;
-- assert(!(reinterpret_cast<uintptr_t>(freestart_)&(kDefaultAlignment-1)));
--}
--
--// -------------------------------------------------------------
--// UnsafeArena::AllocNewBlock()
--// Adds and returns an AllocatedBlock.
--// The returned AllocatedBlock* is valid until the next call
--// to AllocNewBlock or Reset. (i.e. anything that might
--// affect overflow_blocks_).
--// -------------------------------------------------------------
--
--UnsafeArena::AllocatedBlock* UnsafeArena::AllocNewBlock(const size_t block_size) {
-- AllocatedBlock *block;
-- // Find the next block.
-- if ( blocks_alloced_ < arraysize(first_blocks_) ) {
-- // Use one of the pre-allocated blocks
-- block = &first_blocks_[blocks_alloced_++];
-- } else { // oops, out of space, move to the vector
-- if (overflow_blocks_ == NULL) overflow_blocks_ = new vector<AllocatedBlock>;
-- // Adds another block to the vector.
-- overflow_blocks_->resize(overflow_blocks_->size()+1);
-- // block points to the last block of the vector.
-- block = &overflow_blocks_->back();
-- }
--
-- block->mem = reinterpret_cast<char*>(malloc(block_size));
-- block->size = block_size;
--
-- return block;
--}
--
--// ----------------------------------------------------------------------
--// UnsafeArena::GetMemoryFallback()
--// We take memory out of our pool, aligned on the byte boundary
--// requested. If we don't have space in our current pool, we
--// allocate a new block (wasting the remaining space in the
--// current block) and give you that. If your memory needs are
--// too big for a single block, we make a special your-memory-only
--// allocation -- this is equivalent to not using the arena at all.
--// ----------------------------------------------------------------------
--
--void* UnsafeArena::GetMemoryFallback(const size_t size, const int align) {
-- if (size == 0)
-- return NULL; // stl/stl_alloc.h says this is okay
--
-- assert(align > 0 && 0 == (align & (align - 1))); // must be power of 2
--
-- // If the object is more than a quarter of the block size, allocate
-- // it separately to avoid wasting too much space in leftover bytes
-- if (block_size_ == 0 || size > block_size_/4) {
-- // then it gets its own block in the arena
-- assert(align <= kDefaultAlignment); // because that's what new gives us
-- // This block stays separate from the rest of the world; in particular
-- // we don't update last_alloc_ so you can't reclaim space on this block.
-- return AllocNewBlock(size)->mem;
-- }
--
-- const int overage =
-- (reinterpret_cast<uintptr_t>(freestart_) & (align-1));
-- if (overage) {
-- const int waste = align - overage;
-- freestart_ += waste;
-- if (waste < remaining_) {
-- remaining_ -= waste;
-- } else {
-- remaining_ = 0;
-- }
-- }
-- if (size > remaining_) {
-- AllocatedBlock *block = AllocNewBlock(block_size_);
-- freestart_ = block->mem;
-- remaining_ = block->size;
-- }
-- remaining_ -= size;
-- last_alloc_ = freestart_;
-- freestart_ += size;
-- assert((reinterpret_cast<uintptr_t>(last_alloc_) & (align-1)) == 0);
-- return reinterpret_cast<void*>(last_alloc_);
--}
--
--// ----------------------------------------------------------------------
--// UnsafeArena::FreeBlocks()
--// Unlike GetMemory(), which does actual work, ReturnMemory() is a
--// no-op: we don't "free" memory until Reset() is called. We do
--// update some stats, though. Note we do no checking that the
--// pointer you pass in was actually allocated by us, or that it
--// was allocated for the size you say, so be careful here!
--// FreeBlocks() does the work for Reset(), actually freeing all
--// memory allocated in one fell swoop.
--// ----------------------------------------------------------------------
--
--void UnsafeArena::FreeBlocks() {
-- for ( int i = 1; i < blocks_alloced_; ++i ) { // keep first block alloced
-- free(first_blocks_[i].mem);
-- first_blocks_[i].mem = NULL;
-- first_blocks_[i].size = 0;
-- }
-- blocks_alloced_ = 1;
-- if (overflow_blocks_ != NULL) {
-- vector<AllocatedBlock>::iterator it;
-- for (it = overflow_blocks_->begin(); it != overflow_blocks_->end(); ++it) {
-- free(it->mem);
-- }
-- delete overflow_blocks_; // These should be used very rarely
-- overflow_blocks_ = NULL;
-- }
--}
--
--} // namespace re2
-diff --git a/re2/util/arena.h b/re2/util/arena.h
-deleted file mode 100644
-index 7eb385b..0000000
---- a/re2/util/arena.h
-+++ /dev/null
-@@ -1,103 +0,0 @@
--// Copyright 2000 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Sometimes it is necessary to allocate a large number of small
--// objects. Doing this the usual way (malloc, new) is slow,
--// especially for multithreaded programs. An UnsafeArena provides a
--// mark/release method of memory management: it asks for a large chunk
--// from the operating system and doles it out bit by bit as required.
--// Then you free all the memory at once by calling UnsafeArena::Reset().
--// The "Unsafe" refers to the fact that UnsafeArena is not safe to
--// call from multiple threads.
--//
--// The global operator new that can be used as follows:
--//
--// #include "lib/arena-inl.h"
--//
--// UnsafeArena arena(1000);
--// Foo* foo = new (AllocateInArena, &arena) Foo;
--//
--
--#ifndef RE2_UTIL_ARENA_H_
--#define RE2_UTIL_ARENA_H_
--
--namespace re2 {
--
--// This class is thread-compatible.
--class UnsafeArena {
-- public:
-- UnsafeArena(const size_t block_size);
-- virtual ~UnsafeArena();
--
-- void Reset();
--
-- // This should be the worst-case alignment for any type. This is
-- // good for IA-32, SPARC version 7 (the last one I know), and
-- // supposedly Alpha. i386 would be more time-efficient with a
-- // default alignment of 8, but ::operator new() uses alignment of 4,
-- // and an assertion will fail below after the call to MakeNewBlock()
-- // if you try to use a larger alignment.
--#ifdef __i386__
-- static const int kDefaultAlignment = 4;
--#else
-- static const int kDefaultAlignment = 8;
--#endif
--
-- private:
-- void* GetMemoryFallback(const size_t size, const int align);
--
-- public:
-- void* GetMemory(const size_t size, const int align) {
-- if ( size > 0 && size < remaining_ && align == 1 ) { // common case
-- last_alloc_ = freestart_;
-- freestart_ += size;
-- remaining_ -= size;
-- return reinterpret_cast<void*>(last_alloc_);
-- }
-- return GetMemoryFallback(size, align);
-- }
--
-- private:
-- struct AllocatedBlock {
-- char *mem;
-- size_t size;
-- };
--
-- // The returned AllocatedBlock* is valid until the next call to AllocNewBlock
-- // or Reset (i.e. anything that might affect overflow_blocks_).
-- AllocatedBlock *AllocNewBlock(const size_t block_size);
--
-- const AllocatedBlock *IndexToBlock(int index) const;
--
-- const size_t block_size_;
-- char* freestart_; // beginning of the free space in most recent block
-- char* freestart_when_empty_; // beginning of the free space when we're empty
-- char* last_alloc_; // used to make sure ReturnBytes() is safe
-- size_t remaining_;
-- // STL vector isn't as efficient as it could be, so we use an array at first
-- int blocks_alloced_; // how many of the first_blocks_ have been alloced
-- AllocatedBlock first_blocks_[16]; // the length of this array is arbitrary
-- // if the first_blocks_ aren't enough, expand into overflow_blocks_.
-- vector<AllocatedBlock>* overflow_blocks_;
--
-- void FreeBlocks(); // Frees all except first block
--
-- DISALLOW_EVIL_CONSTRUCTORS(UnsafeArena);
--};
--
--// Operators for allocation on the arena
--// Syntax: new (AllocateInArena, arena) MyClass;
--// STL containers, etc.
--enum AllocateInArenaType { AllocateInArena };
--
--} // namespace re2
--
--inline void* operator new(size_t size,
-- re2::AllocateInArenaType /* unused */,
-- re2::UnsafeArena *arena) {
-- return reinterpret_cast<char*>(arena->GetMemory(size, 1));
--}
--
--#endif // RE2_UTIL_ARENA_H_
--
-diff --git a/re2/util/atomicops.h b/re2/util/atomicops.h
-deleted file mode 100644
-index 11c1196..0000000
---- a/re2/util/atomicops.h
-+++ /dev/null
-@@ -1,79 +0,0 @@
--// Copyright 2006-2008 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--#ifndef RE2_UTIL_ATOMICOPS_H__
--#define RE2_UTIL_ATOMICOPS_H__
--
--#if defined(__i386__)
--
--static inline void WriteMemoryBarrier() {
-- int x;
-- __asm__ __volatile__("xchgl (%0),%0" // The lock prefix is implicit for xchg.
-- :: "r" (&x));
--}
--
--#elif defined(__x86_64__)
--
--// 64-bit implementations of memory barrier can be simpler, because
--// "sfence" is guaranteed to exist.
--static inline void WriteMemoryBarrier() {
-- __asm__ __volatile__("sfence" : : : "memory");
--}
--
--#elif defined(__ppc__)
--
--static inline void WriteMemoryBarrier() {
-- __asm__ __volatile__("eieio" : : : "memory");
--}
--
--#elif defined(__alpha__)
--
--static inline void WriteMemoryBarrier() {
-- __asm__ __volatile__("wmb" : : : "memory");
--}
--
--#else
--
--#include "util/mutex.h"
--
--static inline void WriteMemoryBarrier() {
-- // Slight overkill, but good enough:
-- // any mutex implementation must have
-- // a read barrier after the lock operation and
-- // a write barrier before the unlock operation.
-- //
-- // It may be worthwhile to write architecture-specific
-- // barriers for the common platforms, as above, but
-- // this is a correct fallback.
-- re2::Mutex mu;
-- re2::MutexLock l(&mu);
--}
--
--/*
--#error Need WriteMemoryBarrier for architecture.
--
--// Windows
--inline void WriteMemoryBarrier() {
-- LONG x;
-- ::InterlockedExchange(&x, 0);
--}
--*/
--
--#endif
--
--// Alpha has very weak memory ordering. If relying on WriteBarriers, must one
--// use read barriers for the readers too.
--#if defined(__alpha__)
--
--static inline void MaybeReadMemoryBarrier() {
-- __asm__ __volatile__("mb" : : : "memory");
--}
--
--#else
--
--static inline void MaybeReadMemoryBarrier() {}
--
--#endif // __alpha__
--
--#endif // RE2_UTIL_ATOMICOPS_H__
-diff --git a/re2/util/benchmark.cc b/re2/util/benchmark.cc
-deleted file mode 100644
-index c3aad7e..0000000
---- a/re2/util/benchmark.cc
-+++ /dev/null
-@@ -1,153 +0,0 @@
--// Copyright 2009 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--#include "util/util.h"
--#include "util/flags.h"
--#include "util/benchmark.h"
--#include "re2/re2.h"
--
--DEFINE_string(test_tmpdir, "/var/tmp", "temp directory");
--
--using testing::Benchmark;
--using namespace re2;
--
--static Benchmark* benchmarks[10000];
--static int nbenchmarks;
--
--void Benchmark::Register() {
-- benchmarks[nbenchmarks] = this;
-- if(lo < 1)
-- lo = 1;
-- if(hi < lo)
-- hi = lo;
-- nbenchmarks++;
--}
--
--static int64 nsec() {
-- struct timeval tv;
-- if(gettimeofday(&tv, 0) < 0)
-- return -1;
-- return (int64)tv.tv_sec*1000*1000*1000 + tv.tv_usec*1000;
--}
--
--static int64 bytes;
--static int64 ns;
--static int64 t0;
--static int64 items;
--
--void SetBenchmarkBytesProcessed(long long x) {
-- bytes = x;
--}
--
--void StopBenchmarkTiming() {
-- if(t0 != 0)
-- ns += nsec() - t0;
-- t0 = 0;
--}
--
--void StartBenchmarkTiming() {
-- if(t0 == 0)
-- t0 = nsec();
--}
--
--void SetBenchmarkItemsProcessed(int n) {
-- items = n;
--}
--
--void BenchmarkMemoryUsage() {
-- // TODO(rsc): Implement.
--}
--
--int NumCPUs() {
-- return 1;
--}
--
--static void runN(Benchmark *b, int n, int siz) {
-- bytes = 0;
-- items = 0;
-- ns = 0;
-- t0 = nsec();
-- if(b->fn)
-- b->fn(n);
-- else if(b->fnr)
-- b->fnr(n, siz);
-- else {
-- fprintf(stderr, "%s: missing function\n", b->name);
-- exit(2);
-- }
-- if(t0 != 0)
-- ns += nsec() - t0;
--}
--
--static int round(int n) {
-- int base = 1;
--
-- while(base*10 < n)
-- base *= 10;
-- if(n < 2*base)
-- return 2*base;
-- if(n < 5*base)
-- return 5*base;
-- return 10*base;
--}
--
--void RunBench(Benchmark* b, int nthread, int siz) {
-- int n, last;
--
-- // TODO(rsc): Threaded benchmarks.
-- if(nthread != 1)
-- return;
--
-- // run once in case it's expensive
-- n = 1;
-- runN(b, n, siz);
-- while(ns < (int)1e9 && n < (int)1e9) {
-- last = n;
-- if(ns/n == 0)
-- n = 1e9;
-- else
-- n = 1e9 / (ns/n);
--
-- n = max(last+1, min(n+n/2, 100*last));
-- n = round(n);
-- runN(b, n, siz);
-- }
--
-- char mb[100];
-- char suf[100];
-- mb[0] = '\0';
-- suf[0] = '\0';
-- if(ns > 0 && bytes > 0)
-- snprintf(mb, sizeof mb, "\t%7.2f MB/s", ((double)bytes/1e6)/((double)ns/1e9));
-- if(b->fnr || b->lo != b->hi) {
-- if(siz >= (1<<20))
-- snprintf(suf, sizeof suf, "/%dM", siz/(1<<20));
-- else if(siz >= (1<<10))
-- snprintf(suf, sizeof suf, "/%dK", siz/(1<<10));
-- else
-- snprintf(suf, sizeof suf, "/%d", siz);
-- }
-- printf("%s%s\t%8lld\t%10lld ns/op%s\n", b->name, suf, (long long)n, (long long)ns/n, mb);
-- fflush(stdout);
--}
--
--static int match(const char* name, int argc, const char** argv) {
-- if(argc == 1)
-- return 1;
-- for(int i = 1; i < argc; i++)
-- if(RE2::PartialMatch(name, argv[i]))
-- return 1;
-- return 0;
--}
--
--int main(int argc, const char** argv) {
-- for(int i = 0; i < nbenchmarks; i++) {
-- Benchmark* b = benchmarks[i];
-- if(match(b->name, argc, argv))
-- for(int j = b->threadlo; j <= b->threadhi; j++)
-- for(int k = max(b->lo, 1); k <= max(b->hi, 1); k<<=1)
-- RunBench(b, j, k);
-- }
--}
--
-diff --git a/re2/util/benchmark.h b/re2/util/benchmark.h
-deleted file mode 100644
-index 31bbd53..0000000
---- a/re2/util/benchmark.h
-+++ /dev/null
-@@ -1,41 +0,0 @@
--// Copyright 2009 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--#ifndef RE2_UTIL_BENCHMARK_H__
--#define RE2_UTIL_BENCHMARK_H__
--
--namespace testing {
--struct Benchmark {
-- const char* name;
-- void (*fn)(int);
-- void (*fnr)(int, int);
-- int lo;
-- int hi;
-- int threadlo;
-- int threadhi;
--
-- void Register();
-- Benchmark(const char* name, void (*f)(int)) { Clear(name); fn = f; Register(); }
-- Benchmark(const char* name, void (*f)(int, int), int l, int h) { Clear(name); fnr = f; lo = l; hi = h; Register(); }
-- void Clear(const char* n) { name = n; fn = 0; fnr = 0; lo = 0; hi = 0; threadlo = 0; threadhi = 0; }
-- Benchmark* ThreadRange(int lo, int hi) { threadlo = lo; threadhi = hi; return this; }
--};
--} // namespace testing
--
--void SetBenchmarkBytesProcessed(long long);
--void StopBenchmarkTiming();
--void StartBenchmarkTiming();
--void BenchmarkMemoryUsage();
--void SetBenchmarkItemsProcessed(int);
--
--int NumCPUs();
--
--#define BENCHMARK(f) \
-- ::testing::Benchmark* _benchmark_##f = (new ::testing::Benchmark(#f, f))
--
--#define BENCHMARK_RANGE(f, lo, hi) \
-- ::testing::Benchmark* _benchmark_##f = \
-- (new ::testing::Benchmark(#f, f, lo, hi))
--
--#endif // RE2_UTIL_BENCHMARK_H__
-diff --git a/re2/util/flags.h b/re2/util/flags.h
-deleted file mode 100644
-index 77a06a2..0000000
---- a/re2/util/flags.h
-+++ /dev/null
-@@ -1,27 +0,0 @@
--// Copyright 2009 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Simplified version of Google's command line flags.
--// Does not support parsing the command line.
--// If you want to do that, see
--// http://code.google.com/p/google-gflags
--
--#ifndef RE2_UTIL_FLAGS_H__
--#define RE2_UTIL_FLAGS_H__
--
--#define DEFINE_flag(type, name, deflt, desc) \
-- namespace re2 { type FLAGS_##name = deflt; }
--
--#define DECLARE_flag(type, name) \
-- namespace re2 { extern type FLAGS_##name; }
--
--#define DEFINE_bool(name, deflt, desc) DEFINE_flag(bool, name, deflt, desc)
--#define DEFINE_int32(name, deflt, desc) DEFINE_flag(int32, name, deflt, desc)
--#define DEFINE_string(name, deflt, desc) DEFINE_flag(string, name, deflt, desc)
--
--#define DECLARE_bool(name) DECLARE_flag(bool, name)
--#define DECLARE_int32(name) DECLARE_flag(int32, name)
--#define DECLARE_string(name) DECLARE_flag(string, name)
--
--#endif // RE2_UTIL_FLAGS_H__
-diff --git a/re2/util/hash.cc b/re2/util/hash.cc
-deleted file mode 100644
-index dfef7b7..0000000
---- a/re2/util/hash.cc
-+++ /dev/null
-@@ -1,231 +0,0 @@
--// Modified by Russ Cox to add "namespace re2".
--// Also threw away all but hashword and hashword2.
--// http://burtleburtle.net/bob/c/lookup3.c
--
--/*
---------------------------------------------------------------------------------
--lookup3.c, by Bob Jenkins, May 2006, Public Domain.
--
--These are functions for producing 32-bit hashes for hash table lookup.
--hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
--are externally useful functions. Routines to test the hash are included
--if SELF_TEST is defined. You can use this free for any purpose. It's in
--the public domain. It has no warranty.
--
--You probably want to use hashlittle(). hashlittle() and hashbig()
--hash byte arrays. hashlittle() is is faster than hashbig() on
--little-endian machines. Intel and AMD are little-endian machines.
--On second thought, you probably want hashlittle2(), which is identical to
--hashlittle() except it returns two 32-bit hashes for the price of one.
--You could implement hashbig2() if you wanted but I haven't bothered here.
--
--If you want to find a hash of, say, exactly 7 integers, do
-- a = i1; b = i2; c = i3;
-- mix(a,b,c);
-- a += i4; b += i5; c += i6;
-- mix(a,b,c);
-- a += i7;
-- final(a,b,c);
--then use c as the hash value. If you have a variable length array of
--4-byte integers to hash, use hashword(). If you have a byte array (like
--a character string), use hashlittle(). If you have several byte arrays, or
--a mix of things, see the comments above hashlittle().
--
--Why is this so big? I read 12 bytes at a time into 3 4-byte integers,
--then mix those integers. This is fast (you can do a lot more thorough
--mixing with 12*3 instructions on 3 integers than you can with 3 instructions
--on 1 byte), but shoehorning those bytes into integers efficiently is messy.
---------------------------------------------------------------------------------
--*/
--
--#include "util/util.h"
--
--#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
--
--/*
---------------------------------------------------------------------------------
--mix -- mix 3 32-bit values reversibly.
--
--This is reversible, so any information in (a,b,c) before mix() is
--still in (a,b,c) after mix().
--
--If four pairs of (a,b,c) inputs are run through mix(), or through
--mix() in reverse, there are at least 32 bits of the output that
--are sometimes the same for one pair and different for another pair.
--This was tested for:
--* pairs that differed by one bit, by two bits, in any combination
-- of top bits of (a,b,c), or in any combination of bottom bits of
-- (a,b,c).
--* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
-- the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
-- is commonly produced by subtraction) look like a single 1-bit
-- difference.
--* the base values were pseudorandom, all zero but one bit set, or
-- all zero plus a counter that starts at zero.
--
--Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
--satisfy this are
-- 4 6 8 16 19 4
-- 9 15 3 18 27 15
-- 14 9 3 7 17 3
--Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
--for "differ" defined as + with a one-bit base and a two-bit delta. I
--used http://burtleburtle.net/bob/hash/avalanche.html to choose
--the operations, constants, and arrangements of the variables.
--
--This does not achieve avalanche. There are input bits of (a,b,c)
--that fail to affect some output bits of (a,b,c), especially of a. The
--most thoroughly mixed value is c, but it doesn't really even achieve
--avalanche in c.
--
--This allows some parallelism. Read-after-writes are good at doubling
--the number of bits affected, so the goal of mixing pulls in the opposite
--direction as the goal of parallelism. I did what I could. Rotates
--seem to cost as much as shifts on every machine I could lay my hands
--on, and rotates are much kinder to the top and bottom bits, so I used
--rotates.
---------------------------------------------------------------------------------
--*/
--#define mix(a,b,c) \
--{ \
-- a -= c; a ^= rot(c, 4); c += b; \
-- b -= a; b ^= rot(a, 6); a += c; \
-- c -= b; c ^= rot(b, 8); b += a; \
-- a -= c; a ^= rot(c,16); c += b; \
-- b -= a; b ^= rot(a,19); a += c; \
-- c -= b; c ^= rot(b, 4); b += a; \
--}
--
--/*
---------------------------------------------------------------------------------
--final -- final mixing of 3 32-bit values (a,b,c) into c
--
--Pairs of (a,b,c) values differing in only a few bits will usually
--produce values of c that look totally different. This was tested for
--* pairs that differed by one bit, by two bits, in any combination
-- of top bits of (a,b,c), or in any combination of bottom bits of
-- (a,b,c).
--* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
-- the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
-- is commonly produced by subtraction) look like a single 1-bit
-- difference.
--* the base values were pseudorandom, all zero but one bit set, or
-- all zero plus a counter that starts at zero.
--
--These constants passed:
-- 14 11 25 16 4 14 24
-- 12 14 25 16 4 14 24
--and these came close:
-- 4 8 15 26 3 22 24
-- 10 8 15 26 3 22 24
-- 11 8 15 26 3 22 24
---------------------------------------------------------------------------------
--*/
--#define final(a,b,c) \
--{ \
-- c ^= b; c -= rot(b,14); \
-- a ^= c; a -= rot(c,11); \
-- b ^= a; b -= rot(a,25); \
-- c ^= b; c -= rot(b,16); \
-- a ^= c; a -= rot(c,4); \
-- b ^= a; b -= rot(a,14); \
-- c ^= b; c -= rot(b,24); \
--}
--
--namespace re2 {
--
--/*
----------------------------------------------------------------------
-- This works on all machines. To be useful, it requires
-- -- that the key be an array of uint32_t's, and
-- -- that the length be the number of uint32_t's in the key
--
-- The function hashword() is identical to hashlittle() on little-endian
-- machines, and identical to hashbig() on big-endian machines,
-- except that the length has to be measured in uint32_ts rather than in
-- bytes. hashlittle() is more complicated than hashword() only because
-- hashlittle() has to dance around fitting the key bytes into registers.
----------------------------------------------------------------------
--*/
--uint32 hashword(
--const uint32 *k, /* the key, an array of uint32_t values */
--size_t length, /* the length of the key, in uint32_ts */
--uint32 initval) /* the previous hash, or an arbitrary value */
--{
-- uint32_t a,b,c;
--
-- /* Set up the internal state */
-- a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval;
--
-- /*------------------------------------------------- handle most of the key */
-- while (length > 3)
-- {
-- a += k[0];
-- b += k[1];
-- c += k[2];
-- mix(a,b,c);
-- length -= 3;
-- k += 3;
-- }
--
-- /*------------------------------------------- handle the last 3 uint32_t's */
-- switch(length) /* all the case statements fall through */
-- {
-- case 3 : c+=k[2];
-- case 2 : b+=k[1];
-- case 1 : a+=k[0];
-- final(a,b,c);
-- case 0: /* case 0: nothing left to add */
-- break;
-- }
-- /*------------------------------------------------------ report the result */
-- return c;
--}
--
--
--/*
----------------------------------------------------------------------
--hashword2() -- same as hashword(), but take two seeds and return two
--32-bit values. pc and pb must both be nonnull, and *pc and *pb must
--both be initialized with seeds. If you pass in (*pb)==0, the output
--(*pc) will be the same as the return value from hashword().
----------------------------------------------------------------------
--*/
--void hashword2 (
--const uint32 *k, /* the key, an array of uint32_t values */
--size_t length, /* the length of the key, in uint32_ts */
--uint32 *pc, /* IN: seed OUT: primary hash value */
--uint32 *pb) /* IN: more seed OUT: secondary hash value */
--{
-- uint32_t a,b,c;
--
-- /* Set up the internal state */
-- a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + *pc;
-- c += *pb;
--
-- /*------------------------------------------------- handle most of the key */
-- while (length > 3)
-- {
-- a += k[0];
-- b += k[1];
-- c += k[2];
-- mix(a,b,c);
-- length -= 3;
-- k += 3;
-- }
--
-- /*------------------------------------------- handle the last 3 uint32_t's */
-- switch(length) /* all the case statements fall through */
-- {
-- case 3 : c+=k[2];
-- case 2 : b+=k[1];
-- case 1 : a+=k[0];
-- final(a,b,c);
-- case 0: /* case 0: nothing left to add */
-- break;
-- }
-- /*------------------------------------------------------ report the result */
-- *pc=c; *pb=b;
--}
--
--} // namespace re2
-diff --git a/re2/util/logging.h b/re2/util/logging.h
-deleted file mode 100644
-index c8f6604..0000000
---- a/re2/util/logging.h
-+++ /dev/null
-@@ -1,78 +0,0 @@
--// Copyright 2009 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Simplified version of Google's logging.
--
--#ifndef RE2_UTIL_LOGGING_H__
--#define RE2_UTIL_LOGGING_H__
--
--#include <unistd.h> /* for write */
--#include <sstream>
--
--// Debug-only checking.
--#define DCHECK(condition) assert(condition)
--#define DCHECK_EQ(val1, val2) assert((val1) == (val2))
--#define DCHECK_NE(val1, val2) assert((val1) != (val2))
--#define DCHECK_LE(val1, val2) assert((val1) <= (val2))
--#define DCHECK_LT(val1, val2) assert((val1) < (val2))
--#define DCHECK_GE(val1, val2) assert((val1) >= (val2))
--#define DCHECK_GT(val1, val2) assert((val1) > (val2))
--
--// Always-on checking
--#define CHECK(x) if(x){}else LogMessageFatal(__FILE__, __LINE__).stream() << "Check failed: " #x
--#define CHECK_LT(x, y) CHECK((x) < (y))
--#define CHECK_GT(x, y) CHECK((x) > (y))
--#define CHECK_LE(x, y) CHECK((x) <= (y))
--#define CHECK_GE(x, y) CHECK((x) >= (y))
--#define CHECK_EQ(x, y) CHECK((x) == (y))
--#define CHECK_NE(x, y) CHECK((x) != (y))
--
--#define LOG_INFO LogMessage(__FILE__, __LINE__)
--#define LOG_ERROR LOG_INFO
--#define LOG_WARNING LOG_INFO
--#define LOG_FATAL LogMessageFatal(__FILE__, __LINE__)
--#define LOG_QFATAL LOG_FATAL
--
--#define VLOG(x) if((x)>0){}else LOG_INFO.stream()
--
--#ifdef NDEBUG
--#define DEBUG_MODE 0
--#define LOG_DFATAL LOG_ERROR
--#else
--#define DEBUG_MODE 1
--#define LOG_DFATAL LOG_FATAL
--#endif
--
--#define LOG(severity) LOG_ ## severity.stream()
--
--class LogMessage {
-- public:
-- LogMessage(const char* file, int line) {
-- stream() << file << ":" << line << ": ";
-- }
-- ~LogMessage() {
-- stream() << "\n";
-- string s = str_.str();
-- if(write(2, s.data(), s.size()) < 0) {} // shut up gcc
-- }
-- ostream& stream() { return str_; }
--
-- private:
-- std::ostringstream str_;
-- DISALLOW_EVIL_CONSTRUCTORS(LogMessage);
--};
--
--class LogMessageFatal : public LogMessage {
-- public:
-- LogMessageFatal(const char* file, int line)
-- : LogMessage(file, line) { }
-- ~LogMessageFatal() {
-- std::cerr << "\n";
-- abort();
-- }
-- private:
-- DISALLOW_EVIL_CONSTRUCTORS(LogMessageFatal);
--};
--
--#endif // RE2_UTIL_LOGGING_H__
-diff --git a/re2/util/mutex.h b/re2/util/mutex.h
-deleted file mode 100644
-index d2f69e7..0000000
---- a/re2/util/mutex.h
-+++ /dev/null
-@@ -1,190 +0,0 @@
--// Copyright 2007 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--/*
-- * A simple mutex wrapper, supporting locks and read-write locks.
-- * You should assume the locks are *not* re-entrant.
-- */
--
--#ifndef RE2_UTIL_MUTEX_H_
--#define RE2_UTIL_MUTEX_H_
--
--namespace re2 {
--
--#define HAVE_PTHREAD 1
--#define HAVE_RWLOCK 1
--
--#if defined(NO_THREADS)
-- typedef int MutexType; // to keep a lock-count
--#elif defined(HAVE_PTHREAD) && defined(HAVE_RWLOCK)
-- // Needed for pthread_rwlock_*. If it causes problems, you could take it
-- // out, but then you'd have to unset HAVE_RWLOCK (at least on linux -- it
-- // *does* cause problems for FreeBSD, or MacOSX, but isn't needed
-- // for locking there.)
--# ifdef __linux__
--# undef _XOPEN_SOURCE
--# define _XOPEN_SOURCE 500 // may be needed to get the rwlock calls
--# endif
--# include <pthread.h>
-- typedef pthread_rwlock_t MutexType;
--#elif defined(HAVE_PTHREAD)
--# include <pthread.h>
-- typedef pthread_mutex_t MutexType;
--#elif defined(WIN32)
--# define WIN32_LEAN_AND_MEAN // We only need minimal includes
--# ifdef GMUTEX_TRYLOCK
-- // We need Windows NT or later for TryEnterCriticalSection(). If you
-- // don't need that functionality, you can remove these _WIN32_WINNT
-- // lines, and change TryLock() to assert(0) or something.
--# ifndef _WIN32_WINNT
--# define _WIN32_WINNT 0x0400
--# endif
--# endif
--# include <windows.h>
-- typedef CRITICAL_SECTION MutexType;
--#else
--# error Need to implement mutex.h for your architecture, or #define NO_THREADS
--#endif
--
--class Mutex {
-- public:
-- // Create a Mutex that is not held by anybody.
-- inline Mutex();
--
-- // Destructor
-- inline ~Mutex();
--
-- inline void Lock(); // Block if needed until free then acquire exclusively
-- inline void Unlock(); // Release a lock acquired via Lock()
-- inline bool TryLock(); // If free, Lock() and return true, else return false
-- // Note that on systems that don't support read-write locks, these may
-- // be implemented as synonyms to Lock() and Unlock(). So you can use
-- // these for efficiency, but don't use them anyplace where being able
-- // to do shared reads is necessary to avoid deadlock.
-- inline void ReaderLock(); // Block until free or shared then acquire a share
-- inline void ReaderUnlock(); // Release a read share of this Mutex
-- inline void WriterLock() { Lock(); } // Acquire an exclusive lock
-- inline void WriterUnlock() { Unlock(); } // Release a lock from WriterLock()
-- inline void AssertHeld() { }
--
-- private:
-- MutexType mutex_;
--
-- // Catch the error of writing Mutex when intending MutexLock.
-- Mutex(Mutex *ignored) {}
-- // Disallow "evil" constructors
-- Mutex(const Mutex&);
-- void operator=(const Mutex&);
--};
--
--// Now the implementation of Mutex for various systems
--#if defined(NO_THREADS)
--
--// When we don't have threads, we can be either reading or writing,
--// but not both. We can have lots of readers at once (in no-threads
--// mode, that's most likely to happen in recursive function calls),
--// but only one writer. We represent this by having mutex_ be -1 when
--// writing and a number > 0 when reading (and 0 when no lock is held).
--//
--// In debug mode, we assert these invariants, while in non-debug mode
--// we do nothing, for efficiency. That's why everything is in an
--// assert.
--#include <assert.h>
--
--Mutex::Mutex() : mutex_(0) { }
--Mutex::~Mutex() { assert(mutex_ == 0); }
--void Mutex::Lock() { assert(--mutex_ == -1); }
--void Mutex::Unlock() { assert(mutex_++ == -1); }
--bool Mutex::TryLock() { if (mutex_) return false; Lock(); return true; }
--void Mutex::ReaderLock() { assert(++mutex_ > 0); }
--void Mutex::ReaderUnlock() { assert(mutex_-- > 0); }
--
--#elif defined(HAVE_PTHREAD) && defined(HAVE_RWLOCK)
--
--#include <stdlib.h> // for abort()
--#define SAFE_PTHREAD(fncall) do { if ((fncall) != 0) abort(); } while (0)
--
--Mutex::Mutex() { SAFE_PTHREAD(pthread_rwlock_init(&mutex_, NULL)); }
--Mutex::~Mutex() { SAFE_PTHREAD(pthread_rwlock_destroy(&mutex_)); }
--void Mutex::Lock() { SAFE_PTHREAD(pthread_rwlock_wrlock(&mutex_)); }
--void Mutex::Unlock() { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); }
--bool Mutex::TryLock() { return pthread_rwlock_trywrlock(&mutex_) == 0; }
--void Mutex::ReaderLock() { SAFE_PTHREAD(pthread_rwlock_rdlock(&mutex_)); }
--void Mutex::ReaderUnlock() { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); }
--
--#undef SAFE_PTHREAD
--
--#elif defined(HAVE_PTHREAD)
--
--#include <stdlib.h> // for abort()
--#define SAFE_PTHREAD(fncall) do { if ((fncall) != 0) abort(); } while (0)
--
--Mutex::Mutex() { SAFE_PTHREAD(pthread_mutex_init(&mutex_, NULL)); }
--Mutex::~Mutex() { SAFE_PTHREAD(pthread_mutex_destroy(&mutex_)); }
--void Mutex::Lock() { SAFE_PTHREAD(pthread_mutex_lock(&mutex_)); }
--void Mutex::Unlock() { SAFE_PTHREAD(pthread_mutex_unlock(&mutex_)); }
--bool Mutex::TryLock() { return pthread_mutex_trylock(&mutex_) == 0; }
--void Mutex::ReaderLock() { Lock(); } // we don't have read-write locks
--void Mutex::ReaderUnlock() { Unlock(); }
--#undef SAFE_PTHREAD
--
--#elif defined(WIN32)
--
--Mutex::Mutex() { InitializeCriticalSection(&mutex_); }
--Mutex::~Mutex() { DeleteCriticalSection(&mutex_); }
--void Mutex::Lock() { EnterCriticalSection(&mutex_); }
--void Mutex::Unlock() { LeaveCriticalSection(&mutex_); }
--bool Mutex::TryLock() { return TryEnterCriticalSection(&mutex_) != 0; }
--void Mutex::ReaderLock() { Lock(); } // we don't have read-write locks
--void Mutex::ReaderUnlock() { Unlock(); }
--
--#endif
--
--
--// --------------------------------------------------------------------------
--// Some helper classes
--
--// MutexLock(mu) acquires mu when constructed and releases it when destroyed.
--class MutexLock {
-- public:
-- explicit MutexLock(Mutex *mu) : mu_(mu) { mu_->Lock(); }
-- ~MutexLock() { mu_->Unlock(); }
-- private:
-- Mutex * const mu_;
-- // Disallow "evil" constructors
-- MutexLock(const MutexLock&);
-- void operator=(const MutexLock&);
--};
--
--// ReaderMutexLock and WriterMutexLock do the same, for rwlocks
--class ReaderMutexLock {
-- public:
-- explicit ReaderMutexLock(Mutex *mu) : mu_(mu) { mu_->ReaderLock(); }
-- ~ReaderMutexLock() { mu_->ReaderUnlock(); }
-- private:
-- Mutex * const mu_;
-- // Disallow "evil" constructors
-- ReaderMutexLock(const ReaderMutexLock&);
-- void operator=(const ReaderMutexLock&);
--};
--
--class WriterMutexLock {
-- public:
-- explicit WriterMutexLock(Mutex *mu) : mu_(mu) { mu_->WriterLock(); }
-- ~WriterMutexLock() { mu_->WriterUnlock(); }
-- private:
-- Mutex * const mu_;
-- // Disallow "evil" constructors
-- WriterMutexLock(const WriterMutexLock&);
-- void operator=(const WriterMutexLock&);
--};
--
--// Catch bug where variable name is omitted, e.g. MutexLock (&mu);
--#define MutexLock(x) COMPILE_ASSERT(0, mutex_lock_decl_missing_var_name)
--#define ReaderMutexLock(x) COMPILE_ASSERT(0, rmutex_lock_decl_missing_var_name)
--#define WriterMutexLock(x) COMPILE_ASSERT(0, wmutex_lock_decl_missing_var_name)
--
--} // namespace re2
--
--#endif /* #define RE2_UTIL_MUTEX_H_ */
-diff --git a/re2/util/pcre.cc b/re2/util/pcre.cc
-deleted file mode 100644
-index 5e67e1f..0000000
---- a/re2/util/pcre.cc
-+++ /dev/null
-@@ -1,961 +0,0 @@
--// Copyright 2003-2009 Google Inc. All rights reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// This is a variant of PCRE's pcrecpp.cc, originally written at Google.
--// The main changes are the addition of the HitLimit method and
--// compilation as PCRE in namespace re2.
--
--#include <errno.h>
--#include "util/util.h"
--#include "util/flags.h"
--#include "util/pcre.h"
--
--#define PCREPORT(level) LOG(level)
--
--// Default PCRE limits.
--// Defaults chosen to allow a plausible amount of CPU and
--// not exceed main thread stacks. Note that other threads
--// often have smaller stacks, and therefore tightening
--// regexp_stack_limit may frequently be necessary.
--DEFINE_int32(regexp_stack_limit, 256<<10, "default PCRE stack limit (bytes)");
--DEFINE_int32(regexp_match_limit, 1000000,
-- "default PCRE match limit (function calls)");
--
--namespace re2 {
--
--// Maximum number of args we can set
--static const int kMaxArgs = 16;
--static const int kVecSize = (1 + kMaxArgs) * 3; // results + PCRE workspace
--
--// Approximate size of a recursive invocation of PCRE's
--// internal "match()" frame. This varies depending on the
--// compiler and architecture, of course, so the constant is
--// just a conservative estimate. To find the exact number,
--// run regexp_unittest with --regexp_stack_limit=0 under
--// a debugger and look at the frames when it crashes.
--// The exact frame size was 656 in production on 2008/02/03.
--static const int kPCREFrameSize = 700;
--
--// Special name for missing C++ arguments.
--PCRE::Arg PCRE::no_more_args((void*)NULL);
--
--const PCRE::PartialMatchFunctor PCRE::PartialMatch = { };
--const PCRE::FullMatchFunctor PCRE::FullMatch = { } ;
--const PCRE::ConsumeFunctor PCRE::Consume = { };
--const PCRE::FindAndConsumeFunctor PCRE::FindAndConsume = { };
--
--// If a regular expression has no error, its error_ field points here
--static const string empty_string;
--
--void PCRE::Init(const char* pattern, Option options, int match_limit,
-- int stack_limit, bool report_errors) {
-- pattern_ = pattern;
-- options_ = options;
-- match_limit_ = match_limit;
-- stack_limit_ = stack_limit;
-- hit_limit_ = false;
-- error_ = &empty_string;
-- report_errors_ = report_errors;
-- re_full_ = NULL;
-- re_partial_ = NULL;
--
-- if (options & ~(EnabledCompileOptions | EnabledExecOptions)) {
-- error_ = new string("illegal regexp option");
-- PCREPORT(ERROR)
-- << "Error compiling '" << pattern << "': illegal regexp option";
-- } else {
-- re_partial_ = Compile(UNANCHORED);
-- if (re_partial_ != NULL) {
-- re_full_ = Compile(ANCHOR_BOTH);
-- }
-- }
--}
--
--PCRE::PCRE(const char* pattern) {
-- Init(pattern, None, 0, 0, true);
--}
--PCRE::PCRE(const char* pattern, Option option) {
-- Init(pattern, option, 0, 0, true);
--}
--PCRE::PCRE(const string& pattern) {
-- Init(pattern.c_str(), None, 0, 0, true);
--}
--PCRE::PCRE(const string& pattern, Option option) {
-- Init(pattern.c_str(), option, 0, 0, true);
--}
--PCRE::PCRE(const string& pattern, const PCRE_Options& re_option) {
-- Init(pattern.c_str(), re_option.option(), re_option.match_limit(),
-- re_option.stack_limit(), re_option.report_errors());
--}
--
--PCRE::PCRE(const char *pattern, const PCRE_Options& re_option) {
-- Init(pattern, re_option.option(), re_option.match_limit(),
-- re_option.stack_limit(), re_option.report_errors());
--}
--
--PCRE::~PCRE() {
-- if (re_full_ != NULL) pcre_free(re_full_);
-- if (re_partial_ != NULL) pcre_free(re_partial_);
-- if (error_ != &empty_string) delete error_;
--}
--
--pcre* PCRE::Compile(Anchor anchor) {
-- // Special treatment for anchoring. This is needed because at
-- // runtime pcre only provides an option for anchoring at the
-- // beginning of a string.
-- //
-- // There are three types of anchoring we want:
-- // UNANCHORED Compile the original pattern, and use
-- // a pcre unanchored match.
-- // ANCHOR_START Compile the original pattern, and use
-- // a pcre anchored match.
-- // ANCHOR_BOTH Tack a "\z" to the end of the original pattern
-- // and use a pcre anchored match.
--
-- const char* error;
-- int eoffset;
-- pcre* re;
-- if (anchor != ANCHOR_BOTH) {
-- re = pcre_compile(pattern_.c_str(),
-- (options_ & EnabledCompileOptions),
-- &error, &eoffset, NULL);
-- } else {
-- // Tack a '\z' at the end of PCRE. Parenthesize it first so that
-- // the '\z' applies to all top-level alternatives in the regexp.
-- string wrapped = "(?:"; // A non-counting grouping operator
-- wrapped += pattern_;
-- wrapped += ")\\z";
-- re = pcre_compile(wrapped.c_str(),
-- (options_ & EnabledCompileOptions),
-- &error, &eoffset, NULL);
-- }
-- if (re == NULL) {
-- if (error_ == &empty_string) error_ = new string(error);
-- PCREPORT(ERROR) << "Error compiling '" << pattern_ << "': " << error;
-- }
-- return re;
--}
--
--/***** Convenience interfaces *****/
--
--bool PCRE::FullMatchFunctor::operator ()(const StringPiece& text,
-- const PCRE& re,
-- const Arg& a0,
-- const Arg& a1,
-- const Arg& a2,
-- const Arg& a3,
-- const Arg& a4,
-- const Arg& a5,
-- const Arg& a6,
-- const Arg& a7,
-- const Arg& a8,
-- const Arg& a9,
-- const Arg& a10,
-- const Arg& a11,
-- const Arg& a12,
-- const Arg& a13,
-- const Arg& a14,
-- const Arg& a15) const {
-- const Arg* args[kMaxArgs];
-- int n = 0;
-- if (&a0 == &no_more_args) goto done; args[n++] = &a0;
-- if (&a1 == &no_more_args) goto done; args[n++] = &a1;
-- if (&a2 == &no_more_args) goto done; args[n++] = &a2;
-- if (&a3 == &no_more_args) goto done; args[n++] = &a3;
-- if (&a4 == &no_more_args) goto done; args[n++] = &a4;
-- if (&a5 == &no_more_args) goto done; args[n++] = &a5;
-- if (&a6 == &no_more_args) goto done; args[n++] = &a6;
-- if (&a7 == &no_more_args) goto done; args[n++] = &a7;
-- if (&a8 == &no_more_args) goto done; args[n++] = &a8;
-- if (&a9 == &no_more_args) goto done; args[n++] = &a9;
-- if (&a10 == &no_more_args) goto done; args[n++] = &a10;
-- if (&a11 == &no_more_args) goto done; args[n++] = &a11;
-- if (&a12 == &no_more_args) goto done; args[n++] = &a12;
-- if (&a13 == &no_more_args) goto done; args[n++] = &a13;
-- if (&a14 == &no_more_args) goto done; args[n++] = &a14;
-- if (&a15 == &no_more_args) goto done; args[n++] = &a15;
--done:
--
-- int consumed;
-- int vec[kVecSize];
-- return re.DoMatchImpl(text, ANCHOR_BOTH, &consumed, args, n, vec, kVecSize);
--}
--
--bool PCRE::PartialMatchFunctor::operator ()(const StringPiece& text,
-- const PCRE& re,
-- const Arg& a0,
-- const Arg& a1,
-- const Arg& a2,
-- const Arg& a3,
-- const Arg& a4,
-- const Arg& a5,
-- const Arg& a6,
-- const Arg& a7,
-- const Arg& a8,
-- const Arg& a9,
-- const Arg& a10,
-- const Arg& a11,
-- const Arg& a12,
-- const Arg& a13,
-- const Arg& a14,
-- const Arg& a15) const {
-- const Arg* args[kMaxArgs];
-- int n = 0;
-- if (&a0 == &no_more_args) goto done; args[n++] = &a0;
-- if (&a1 == &no_more_args) goto done; args[n++] = &a1;
-- if (&a2 == &no_more_args) goto done; args[n++] = &a2;
-- if (&a3 == &no_more_args) goto done; args[n++] = &a3;
-- if (&a4 == &no_more_args) goto done; args[n++] = &a4;
-- if (&a5 == &no_more_args) goto done; args[n++] = &a5;
-- if (&a6 == &no_more_args) goto done; args[n++] = &a6;
-- if (&a7 == &no_more_args) goto done; args[n++] = &a7;
-- if (&a8 == &no_more_args) goto done; args[n++] = &a8;
-- if (&a9 == &no_more_args) goto done; args[n++] = &a9;
-- if (&a10 == &no_more_args) goto done; args[n++] = &a10;
-- if (&a11 == &no_more_args) goto done; args[n++] = &a11;
-- if (&a12 == &no_more_args) goto done; args[n++] = &a12;
-- if (&a13 == &no_more_args) goto done; args[n++] = &a13;
-- if (&a14 == &no_more_args) goto done; args[n++] = &a14;
-- if (&a15 == &no_more_args) goto done; args[n++] = &a15;
--done:
--
-- int consumed;
-- int vec[kVecSize];
-- return re.DoMatchImpl(text, UNANCHORED, &consumed, args, n, vec, kVecSize);
--}
--
--bool PCRE::ConsumeFunctor::operator ()(StringPiece* input,
-- const PCRE& pattern,
-- const Arg& a0,
-- const Arg& a1,
-- const Arg& a2,
-- const Arg& a3,
-- const Arg& a4,
-- const Arg& a5,
-- const Arg& a6,
-- const Arg& a7,
-- const Arg& a8,
-- const Arg& a9,
-- const Arg& a10,
-- const Arg& a11,
-- const Arg& a12,
-- const Arg& a13,
-- const Arg& a14,
-- const Arg& a15) const {
-- const Arg* args[kMaxArgs];
-- int n = 0;
-- if (&a0 == &no_more_args) goto done; args[n++] = &a0;
-- if (&a1 == &no_more_args) goto done; args[n++] = &a1;
-- if (&a2 == &no_more_args) goto done; args[n++] = &a2;
-- if (&a3 == &no_more_args) goto done; args[n++] = &a3;
-- if (&a4 == &no_more_args) goto done; args[n++] = &a4;
-- if (&a5 == &no_more_args) goto done; args[n++] = &a5;
-- if (&a6 == &no_more_args) goto done; args[n++] = &a6;
-- if (&a7 == &no_more_args) goto done; args[n++] = &a7;
-- if (&a8 == &no_more_args) goto done; args[n++] = &a8;
-- if (&a9 == &no_more_args) goto done; args[n++] = &a9;
-- if (&a10 == &no_more_args) goto done; args[n++] = &a10;
-- if (&a11 == &no_more_args) goto done; args[n++] = &a11;
-- if (&a12 == &no_more_args) goto done; args[n++] = &a12;
-- if (&a13 == &no_more_args) goto done; args[n++] = &a13;
-- if (&a14 == &no_more_args) goto done; args[n++] = &a14;
-- if (&a15 == &no_more_args) goto done; args[n++] = &a15;
--done:
--
-- int consumed;
-- int vec[kVecSize];
-- if (pattern.DoMatchImpl(*input, ANCHOR_START, &consumed,
-- args, n, vec, kVecSize)) {
-- input->remove_prefix(consumed);
-- return true;
-- } else {
-- return false;
-- }
--}
--
--bool PCRE::FindAndConsumeFunctor::operator ()(StringPiece* input,
-- const PCRE& pattern,
-- const Arg& a0,
-- const Arg& a1,
-- const Arg& a2,
-- const Arg& a3,
-- const Arg& a4,
-- const Arg& a5,
-- const Arg& a6,
-- const Arg& a7,
-- const Arg& a8,
-- const Arg& a9,
-- const Arg& a10,
-- const Arg& a11,
-- const Arg& a12,
-- const Arg& a13,
-- const Arg& a14,
-- const Arg& a15) const {
-- const Arg* args[kMaxArgs];
-- int n = 0;
-- if (&a0 == &no_more_args) goto done; args[n++] = &a0;
-- if (&a1 == &no_more_args) goto done; args[n++] = &a1;
-- if (&a2 == &no_more_args) goto done; args[n++] = &a2;
-- if (&a3 == &no_more_args) goto done; args[n++] = &a3;
-- if (&a4 == &no_more_args) goto done; args[n++] = &a4;
-- if (&a5 == &no_more_args) goto done; args[n++] = &a5;
-- if (&a6 == &no_more_args) goto done; args[n++] = &a6;
-- if (&a7 == &no_more_args) goto done; args[n++] = &a7;
-- if (&a8 == &no_more_args) goto done; args[n++] = &a8;
-- if (&a9 == &no_more_args) goto done; args[n++] = &a9;
-- if (&a10 == &no_more_args) goto done; args[n++] = &a10;
-- if (&a11 == &no_more_args) goto done; args[n++] = &a11;
-- if (&a12 == &no_more_args) goto done; args[n++] = &a12;
-- if (&a13 == &no_more_args) goto done; args[n++] = &a13;
-- if (&a14 == &no_more_args) goto done; args[n++] = &a14;
-- if (&a15 == &no_more_args) goto done; args[n++] = &a15;
--done:
--
-- int consumed;
-- int vec[kVecSize];
-- if (pattern.DoMatchImpl(*input, UNANCHORED, &consumed,
-- args, n, vec, kVecSize)) {
-- input->remove_prefix(consumed);
-- return true;
-- } else {
-- return false;
-- }
--}
--
--bool PCRE::Replace(string *str,
-- const PCRE& pattern,
-- const StringPiece& rewrite) {
-- int vec[kVecSize];
-- int matches = pattern.TryMatch(*str, 0, UNANCHORED, true, vec, kVecSize);
-- if (matches == 0)
-- return false;
--
-- string s;
-- if (!pattern.Rewrite(&s, rewrite, *str, vec, matches))
-- return false;
--
-- assert(vec[0] >= 0);
-- assert(vec[1] >= 0);
-- str->replace(vec[0], vec[1] - vec[0], s);
-- return true;
--}
--
--int PCRE::GlobalReplace(string *str,
-- const PCRE& pattern,
-- const StringPiece& rewrite) {
-- int count = 0;
-- int vec[kVecSize];
-- string out;
-- int start = 0;
-- bool last_match_was_empty_string = false;
--
-- for (; start <= str->length();) {
-- // If the previous match was for the empty string, we shouldn't
-- // just match again: we'll match in the same way and get an
-- // infinite loop. Instead, we do the match in a special way:
-- // anchored -- to force another try at the same position --
-- // and with a flag saying that this time, ignore empty matches.
-- // If this special match returns, that means there's a non-empty
-- // match at this position as well, and we can continue. If not,
-- // we do what perl does, and just advance by one.
-- // Notice that perl prints '@@@' for this;
-- // perl -le '$_ = "aa"; s/b*|aa/@/g; print'
-- int matches;
-- if (last_match_was_empty_string) {
-- matches = pattern.TryMatch(*str, start, ANCHOR_START, false,
-- vec, kVecSize);
-- if (matches <= 0) {
-- if (start < str->length())
-- out.push_back((*str)[start]);
-- start++;
-- last_match_was_empty_string = false;
-- continue;
-- }
-- } else {
-- matches = pattern.TryMatch(*str, start, UNANCHORED, true, vec, kVecSize);
-- if (matches <= 0)
-- break;
-- }
-- int matchstart = vec[0], matchend = vec[1];
-- assert(matchstart >= start);
-- assert(matchend >= matchstart);
--
-- out.append(*str, start, matchstart - start);
-- pattern.Rewrite(&out, rewrite, *str, vec, matches);
-- start = matchend;
-- count++;
-- last_match_was_empty_string = (matchstart == matchend);
-- }
--
-- if (count == 0)
-- return 0;
--
-- if (start < str->length())
-- out.append(*str, start, str->length() - start);
-- swap(out, *str);
-- return count;
--}
--
--bool PCRE::Extract(const StringPiece &text,
-- const PCRE& pattern,
-- const StringPiece &rewrite,
-- string *out) {
-- int vec[kVecSize];
-- int matches = pattern.TryMatch(text, 0, UNANCHORED, true, vec, kVecSize);
-- if (matches == 0)
-- return false;
-- out->clear();
-- return pattern.Rewrite(out, rewrite, text, vec, matches);
--}
--
--string PCRE::QuoteMeta(const StringPiece& unquoted) {
-- string result;
-- result.reserve(unquoted.size() << 1);
--
-- // Escape any ascii character not in [A-Za-z_0-9].
-- //
-- // Note that it's legal to escape a character even if it has no
-- // special meaning in a regular expression -- so this function does
-- // that. (This also makes it identical to the perl function of the
-- // same name except for the null-character special case;
-- // see `perldoc -f quotemeta`.)
-- for (int ii = 0; ii < unquoted.length(); ++ii) {
-- // Note that using 'isalnum' here raises the benchmark time from
-- // 32ns to 58ns:
-- if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
-- (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
-- (unquoted[ii] < '0' || unquoted[ii] > '9') &&
-- unquoted[ii] != '_' &&
-- // If this is the part of a UTF8 or Latin1 character, we need
-- // to copy this byte without escaping. Experimentally this is
-- // what works correctly with the regexp library.
-- !(unquoted[ii] & 128)) {
-- if (unquoted[ii] == '\0') { // Special handling for null chars.
-- // Can't use "\\0" since the next character might be a digit.
-- result += "\\x00";
-- continue;
-- }
-- result += '\\';
-- }
-- result += unquoted[ii];
-- }
--
-- return result;
--}
--
--/***** Actual matching and rewriting code *****/
--
--bool PCRE::HitLimit() {
-- return hit_limit_;
--}
--
--void PCRE::ClearHitLimit() {
-- hit_limit_ = 0;
--}
--
--int PCRE::TryMatch(const StringPiece& text,
-- int startpos,
-- Anchor anchor,
-- bool empty_ok,
-- int *vec,
-- int vecsize) const {
-- pcre* re = (anchor == ANCHOR_BOTH) ? re_full_ : re_partial_;
-- if (re == NULL) {
-- PCREPORT(ERROR) << "Matching against invalid re: " << *error_;
-- return 0;
-- }
--
-- int match_limit = match_limit_;
-- if (match_limit <= 0) {
-- match_limit = FLAGS_regexp_match_limit;
-- }
--
-- int stack_limit = stack_limit_;
-- if (stack_limit <= 0) {
-- stack_limit = FLAGS_regexp_stack_limit;
-- }
--
-- pcre_extra extra = { 0 };
-- if (match_limit > 0) {
-- extra.flags |= PCRE_EXTRA_MATCH_LIMIT;
-- extra.match_limit = match_limit;
-- }
-- if (stack_limit > 0) {
-- extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
-- extra.match_limit_recursion = stack_limit / kPCREFrameSize;
-- }
--
-- int options = 0;
-- if (anchor != UNANCHORED)
-- options |= PCRE_ANCHORED;
-- if (!empty_ok)
-- options |= PCRE_NOTEMPTY;
--
-- int rc = pcre_exec(re, // The regular expression object
-- &extra,
-- (text.data() == NULL) ? "" : text.data(),
-- text.size(),
-- startpos,
-- options,
-- vec,
-- vecsize);
--
-- // Handle errors
-- if (rc == 0) {
-- // pcre_exec() returns 0 as a special case when the number of
-- // capturing subpatterns exceeds the size of the vector.
-- // When this happens, there is a match and the output vector
-- // is filled, but we miss out on the positions of the extra subpatterns.
-- rc = vecsize / 2;
-- } else if (rc < 0) {
-- switch (rc) {
-- case PCRE_ERROR_NOMATCH:
-- return 0;
-- case PCRE_ERROR_MATCHLIMIT:
-- // Writing to hit_limit is not safe if multiple threads
-- // are using the PCRE, but the flag is only intended
-- // for use by unit tests anyway, so we let it go.
-- hit_limit_ = true;
-- PCREPORT(WARNING) << "Exceeded match limit of " << match_limit
-- << " when matching '" << pattern_ << "'"
-- << " against text that is " << text.size() << " bytes.";
-- return 0;
-- case PCRE_ERROR_RECURSIONLIMIT:
-- // See comment about hit_limit above.
-- hit_limit_ = true;
-- PCREPORT(WARNING) << "Exceeded stack limit of " << stack_limit
-- << " when matching '" << pattern_ << "'"
-- << " against text that is " << text.size() << " bytes.";
-- return 0;
-- default:
-- // There are other return codes from pcre.h :
-- // PCRE_ERROR_NULL (-2)
-- // PCRE_ERROR_BADOPTION (-3)
-- // PCRE_ERROR_BADMAGIC (-4)
-- // PCRE_ERROR_UNKNOWN_NODE (-5)
-- // PCRE_ERROR_NOMEMORY (-6)
-- // PCRE_ERROR_NOSUBSTRING (-7)
-- // ...
-- PCREPORT(ERROR) << "Unexpected return code: " << rc
-- << " when matching '" << pattern_ << "'"
-- << ", re=" << re
-- << ", text=" << text
-- << ", vec=" << vec
-- << ", vecsize=" << vecsize;
-- return 0;
-- }
-- }
--
-- return rc;
--}
--
--bool PCRE::DoMatchImpl(const StringPiece& text,
-- Anchor anchor,
-- int* consumed,
-- const Arg* const* args,
-- int n,
-- int* vec,
-- int vecsize) const {
-- assert((1 + n) * 3 <= vecsize); // results + PCRE workspace
-- int matches = TryMatch(text, 0, anchor, true, vec, vecsize);
-- assert(matches >= 0); // TryMatch never returns negatives
-- if (matches == 0)
-- return false;
--
-- *consumed = vec[1];
--
-- if (n == 0 || args == NULL) {
-- // We are not interested in results
-- return true;
-- }
-- if (NumberOfCapturingGroups() < n) {
-- // PCRE has fewer capturing groups than number of arg pointers passed in
-- return false;
-- }
--
-- // If we got here, we must have matched the whole pattern.
-- // We do not need (can not do) any more checks on the value of 'matches' here
-- // -- see the comment for TryMatch.
-- for (int i = 0; i < n; i++) {
-- const int start = vec[2*(i+1)];
-- const int limit = vec[2*(i+1)+1];
-- if (!args[i]->Parse(text.data() + start, limit-start)) {
-- // TODO: Should we indicate what the error was?
-- return false;
-- }
-- }
--
-- return true;
--}
--
--bool PCRE::DoMatch(const StringPiece& text,
-- Anchor anchor,
-- int* consumed,
-- const Arg* const args[],
-- int n) const {
-- assert(n >= 0);
-- size_t const vecsize = (1 + n) * 3; // results + PCRE workspace
-- // (as for kVecSize)
-- int *vec = new int[vecsize];
-- bool b = DoMatchImpl(text, anchor, consumed, args, n, vec, vecsize);
-- delete[] vec;
-- return b;
--}
--
--bool PCRE::Rewrite(string *out, const StringPiece &rewrite,
-- const StringPiece &text, int *vec, int veclen) const {
-- int number_of_capturing_groups = NumberOfCapturingGroups();
-- for (const char *s = rewrite.data(), *end = s + rewrite.size();
-- s < end; s++) {
-- int c = *s;
-- if (c == '\\') {
-- c = *++s;
-- if (isdigit(c)) {
-- int n = (c - '0');
-- if (n >= veclen) {
-- if (n <= number_of_capturing_groups) {
-- // unmatched optional capturing group. treat
-- // its value as empty string; i.e., nothing to append.
-- } else {
-- PCREPORT(ERROR) << "requested group " << n
-- << " in regexp " << rewrite.data();
-- return false;
-- }
-- }
-- int start = vec[2 * n];
-- if (start >= 0)
-- out->append(text.data() + start, vec[2 * n + 1] - start);
-- } else if (c == '\\') {
-- out->push_back('\\');
-- } else {
-- PCREPORT(ERROR) << "invalid rewrite pattern: " << rewrite.data();
-- return false;
-- }
-- } else {
-- out->push_back(c);
-- }
-- }
-- return true;
--}
--
--bool PCRE::CheckRewriteString(const StringPiece& rewrite, string* error) const {
-- int max_token = -1;
-- for (const char *s = rewrite.data(), *end = s + rewrite.size();
-- s < end; s++) {
-- int c = *s;
-- if (c != '\\') {
-- continue;
-- }
-- if (++s == end) {
-- *error = "Rewrite schema error: '\\' not allowed at end.";
-- return false;
-- }
-- c = *s;
-- if (c == '\\') {
-- continue;
-- }
-- if (!isdigit(c)) {
-- *error = "Rewrite schema error: "
-- "'\\' must be followed by a digit or '\\'.";
-- return false;
-- }
-- int n = (c - '0');
-- if (max_token < n) {
-- max_token = n;
-- }
-- }
--
-- if (max_token > NumberOfCapturingGroups()) {
-- SStringPrintf(error, "Rewrite schema requests %d matches, "
-- "but the regexp only has %d parenthesized subexpressions.",
-- max_token, NumberOfCapturingGroups());
-- return false;
-- }
-- return true;
--}
--
--
--// Return the number of capturing subpatterns, or -1 if the
--// regexp wasn't valid on construction.
--int PCRE::NumberOfCapturingGroups() const {
-- if (re_partial_ == NULL) return -1;
--
-- int result;
-- CHECK(pcre_fullinfo(re_partial_, // The regular expression object
-- NULL, // We did not study the pattern
-- PCRE_INFO_CAPTURECOUNT,
-- &result) == 0);
-- return result;
--}
--
--
--/***** Parsers for various types *****/
--
--bool PCRE::Arg::parse_null(const char* str, int n, void* dest) {
-- // We fail if somebody asked us to store into a non-NULL void* pointer
-- return (dest == NULL);
--}
--
--bool PCRE::Arg::parse_string(const char* str, int n, void* dest) {
-- if (dest == NULL) return true;
-- reinterpret_cast<string*>(dest)->assign(str, n);
-- return true;
--}
--
--bool PCRE::Arg::parse_stringpiece(const char* str, int n, void* dest) {
-- if (dest == NULL) return true;
-- reinterpret_cast<StringPiece*>(dest)->set(str, n);
-- return true;
--}
--
--bool PCRE::Arg::parse_char(const char* str, int n, void* dest) {
-- if (n != 1) return false;
-- if (dest == NULL) return true;
-- *(reinterpret_cast<char*>(dest)) = str[0];
-- return true;
--}
--
--bool PCRE::Arg::parse_uchar(const char* str, int n, void* dest) {
-- if (n != 1) return false;
-- if (dest == NULL) return true;
-- *(reinterpret_cast<unsigned char*>(dest)) = str[0];
-- return true;
--}
--
--// Largest number spec that we are willing to parse
--static const int kMaxNumberLength = 32;
--
--// PCREQUIPCRES "buf" must have length at least kMaxNumberLength+1
--// PCREQUIPCRES "n > 0"
--// Copies "str" into "buf" and null-terminates if necessary.
--// Returns one of:
--// a. "str" if no termination is needed
--// b. "buf" if the string was copied and null-terminated
--// c. "" if the input was invalid and has no hope of being parsed
--static const char* TerminateNumber(char* buf, const char* str, int n) {
-- if ((n > 0) && isspace(*str)) {
-- // We are less forgiving than the strtoxxx() routines and do not
-- // allow leading spaces.
-- return "";
-- }
--
-- // See if the character right after the input text may potentially
-- // look like a digit.
-- if (isdigit(str[n]) ||
-- ((str[n] >= 'a') && (str[n] <= 'f')) ||
-- ((str[n] >= 'A') && (str[n] <= 'F'))) {
-- if (n > kMaxNumberLength) return ""; // Input too big to be a valid number
-- memcpy(buf, str, n);
-- buf[n] = '\0';
-- return buf;
-- } else {
-- // We can parse right out of the supplied string, so return it.
-- return str;
-- }
--}
--
--bool PCRE::Arg::parse_long_radix(const char* str,
-- int n,
-- void* dest,
-- int radix) {
-- if (n == 0) return false;
-- char buf[kMaxNumberLength+1];
-- str = TerminateNumber(buf, str, n);
-- char* end;
-- errno = 0;
-- long r = strtol(str, &end, radix);
-- if (end != str + n) return false; // Leftover junk
-- if (errno) return false;
-- if (dest == NULL) return true;
-- *(reinterpret_cast<long*>(dest)) = r;
-- return true;
--}
--
--bool PCRE::Arg::parse_ulong_radix(const char* str,
-- int n,
-- void* dest,
-- int radix) {
-- if (n == 0) return false;
-- char buf[kMaxNumberLength+1];
-- str = TerminateNumber(buf, str, n);
-- if (str[0] == '-') {
-- // strtoul() will silently accept negative numbers and parse
-- // them. This module is more strict and treats them as errors.
-- return false;
-- }
--
-- char* end;
-- errno = 0;
-- unsigned long r = strtoul(str, &end, radix);
-- if (end != str + n) return false; // Leftover junk
-- if (errno) return false;
-- if (dest == NULL) return true;
-- *(reinterpret_cast<unsigned long*>(dest)) = r;
-- return true;
--}
--
--bool PCRE::Arg::parse_short_radix(const char* str,
-- int n,
-- void* dest,
-- int radix) {
-- long r;
-- if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
-- if ((short)r != r) return false; // Out of range
-- if (dest == NULL) return true;
-- *(reinterpret_cast<short*>(dest)) = r;
-- return true;
--}
--
--bool PCRE::Arg::parse_ushort_radix(const char* str,
-- int n,
-- void* dest,
-- int radix) {
-- unsigned long r;
-- if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
-- if ((ushort)r != r) return false; // Out of range
-- if (dest == NULL) return true;
-- *(reinterpret_cast<unsigned short*>(dest)) = r;
-- return true;
--}
--
--bool PCRE::Arg::parse_int_radix(const char* str,
-- int n,
-- void* dest,
-- int radix) {
-- long r;
-- if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
-- if ((int)r != r) return false; // Out of range
-- if (dest == NULL) return true;
-- *(reinterpret_cast<int*>(dest)) = r;
-- return true;
--}
--
--bool PCRE::Arg::parse_uint_radix(const char* str,
-- int n,
-- void* dest,
-- int radix) {
-- unsigned long r;
-- if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
-- if ((uint)r != r) return false; // Out of range
-- if (dest == NULL) return true;
-- *(reinterpret_cast<unsigned int*>(dest)) = r;
-- return true;
--}
--
--bool PCRE::Arg::parse_longlong_radix(const char* str,
-- int n,
-- void* dest,
-- int radix) {
-- if (n == 0) return false;
-- char buf[kMaxNumberLength+1];
-- str = TerminateNumber(buf, str, n);
-- char* end;
-- errno = 0;
-- int64 r = strtoll(str, &end, radix);
-- if (end != str + n) return false; // Leftover junk
-- if (errno) return false;
-- if (dest == NULL) return true;
-- *(reinterpret_cast<int64*>(dest)) = r;
-- return true;
--}
--
--bool PCRE::Arg::parse_ulonglong_radix(const char* str,
-- int n,
-- void* dest,
-- int radix) {
-- if (n == 0) return false;
-- char buf[kMaxNumberLength+1];
-- str = TerminateNumber(buf, str, n);
-- if (str[0] == '-') {
-- // strtoull() will silently accept negative numbers and parse
-- // them. This module is more strict and treats them as errors.
-- return false;
-- }
-- char* end;
-- errno = 0;
-- uint64 r = strtoull(str, &end, radix);
-- if (end != str + n) return false; // Leftover junk
-- if (errno) return false;
-- if (dest == NULL) return true;
-- *(reinterpret_cast<uint64*>(dest)) = r;
-- return true;
--}
--
--bool PCRE::Arg::parse_double(const char* str, int n, void* dest) {
-- if (n == 0) return false;
-- static const int kMaxLength = 200;
-- char buf[kMaxLength];
-- if (n >= kMaxLength) return false;
-- memcpy(buf, str, n);
-- buf[n] = '\0';
-- errno = 0;
-- char* end;
-- double r = strtod(buf, &end);
-- if (end != buf + n) {
--#ifdef COMPILER_MSVC
-- // Microsoft's strtod() doesn't handle inf and nan, so we have to
-- // handle it explicitly. Speed is not important here because this
-- // code is only called in unit tests.
-- bool pos = true;
-- const char* i = buf;
-- if ('-' == *i) {
-- pos = false;
-- ++i;
-- } else if ('+' == *i) {
-- ++i;
-- }
-- if (0 == stricmp(i, "inf") || 0 == stricmp(i, "infinity")) {
-- r = numeric_limits<double>::infinity();
-- if (!pos)
-- r = -r;
-- } else if (0 == stricmp(i, "nan")) {
-- r = numeric_limits<double>::quiet_NaN();
-- } else {
-- return false;
-- }
--#else
-- return false; // Leftover junk
--#endif
-- }
-- if (errno) return false;
-- if (dest == NULL) return true;
-- *(reinterpret_cast<double*>(dest)) = r;
-- return true;
--}
--
--bool PCRE::Arg::parse_float(const char* str, int n, void* dest) {
-- double r;
-- if (!parse_double(str, n, &r)) return false;
-- if (dest == NULL) return true;
-- *(reinterpret_cast<float*>(dest)) = static_cast<float>(r);
-- return true;
--}
--
--
--#define DEFINE_INTEGER_PARSERS(name) \
-- bool PCRE::Arg::parse_##name(const char* str, int n, void* dest) { \
-- return parse_##name##_radix(str, n, dest, 10); \
-- } \
-- bool PCRE::Arg::parse_##name##_hex(const char* str, int n, void* dest) { \
-- return parse_##name##_radix(str, n, dest, 16); \
-- } \
-- bool PCRE::Arg::parse_##name##_octal(const char* str, int n, void* dest) { \
-- return parse_##name##_radix(str, n, dest, 8); \
-- } \
-- bool PCRE::Arg::parse_##name##_cradix(const char* str, int n, void* dest) { \
-- return parse_##name##_radix(str, n, dest, 0); \
-- }
--
--DEFINE_INTEGER_PARSERS(short);
--DEFINE_INTEGER_PARSERS(ushort);
--DEFINE_INTEGER_PARSERS(int);
--DEFINE_INTEGER_PARSERS(uint);
--DEFINE_INTEGER_PARSERS(long);
--DEFINE_INTEGER_PARSERS(ulong);
--DEFINE_INTEGER_PARSERS(longlong);
--DEFINE_INTEGER_PARSERS(ulonglong);
--
--#undef DEFINE_INTEGER_PARSERS
--
--} // namespace re2
-diff --git a/re2/util/pcre.h b/re2/util/pcre.h
-deleted file mode 100644
-index 4dda95d..0000000
---- a/re2/util/pcre.h
-+++ /dev/null
-@@ -1,679 +0,0 @@
--// Copyright 2003-2010 Google Inc. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// This is a variant of PCRE's pcrecpp.h, originally written at Google.
--// The main changes are the addition of the HitLimit method and
--// compilation as PCRE in namespace re2.
--
--// C++ interface to the pcre regular-expression library. PCRE supports
--// Perl-style regular expressions (with extensions like \d, \w, \s,
--// ...).
--//
--// -----------------------------------------------------------------------
--// REGEXP SYNTAX:
--//
--// This module uses the pcre library and hence supports its syntax
--// for regular expressions:
--//
--// http://www.google.com/search?q=pcre
--//
--// The syntax is pretty similar to Perl's. For those not familiar
--// with Perl's regular expressions, here are some examples of the most
--// commonly used extensions:
--//
--// "hello (\\w+) world" -- \w matches a "word" character
--// "version (\\d+)" -- \d matches a digit
--// "hello\\s+world" -- \s matches any whitespace character
--// "\\b(\\w+)\\b" -- \b matches empty string at a word boundary
--// "(?i)hello" -- (?i) turns on case-insensitive matching
--// "/\\*(.*?)\\*/" -- .*? matches . minimum no. of times possible
--//
--// -----------------------------------------------------------------------
--// MATCHING INTERFACE:
--//
--// The "FullMatch" operation checks that supplied text matches a
--// supplied pattern exactly.
--//
--// Example: successful match
--// CHECK(PCRE::FullMatch("hello", "h.*o"));
--//
--// Example: unsuccessful match (requires full match):
--// CHECK(!PCRE::FullMatch("hello", "e"));
--//
--// -----------------------------------------------------------------------
--// UTF-8 AND THE MATCHING INTERFACE:
--//
--// By default, pattern and text are plain text, one byte per character.
--// The UTF8 flag, passed to the constructor, causes both pattern
--// and string to be treated as UTF-8 text, still a byte stream but
--// potentially multiple bytes per character. In practice, the text
--// is likelier to be UTF-8 than the pattern, but the match returned
--// may depend on the UTF8 flag, so always use it when matching
--// UTF8 text. E.g., "." will match one byte normally but with UTF8
--// set may match up to three bytes of a multi-byte character.
--//
--// Example:
--// PCRE re(utf8_pattern, PCRE::UTF8);
--// CHECK(PCRE::FullMatch(utf8_string, re));
--//
--// -----------------------------------------------------------------------
--// MATCHING WITH SUB-STRING EXTRACTION:
--//
--// You can supply extra pointer arguments to extract matched subpieces.
--//
--// Example: extracts "ruby" into "s" and 1234 into "i"
--// int i;
--// string s;
--// CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
--//
--// Example: fails because string cannot be stored in integer
--// CHECK(!PCRE::FullMatch("ruby", "(.*)", &i));
--//
--// Example: fails because there aren't enough sub-patterns:
--// CHECK(!PCRE::FullMatch("ruby:1234", "\\w+:\\d+", &s));
--//
--// Example: does not try to extract any extra sub-patterns
--// CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s));
--//
--// Example: does not try to extract into NULL
--// CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", NULL, &i));
--//
--// Example: integer overflow causes failure
--// CHECK(!PCRE::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i));
--//
--// -----------------------------------------------------------------------
--// PARTIAL MATCHES
--//
--// You can use the "PartialMatch" operation when you want the pattern
--// to match any substring of the text.
--//
--// Example: simple search for a string:
--// CHECK(PCRE::PartialMatch("hello", "ell"));
--//
--// Example: find first number in a string
--// int number;
--// CHECK(PCRE::PartialMatch("x*100 + 20", "(\\d+)", &number));
--// CHECK_EQ(number, 100);
--//
--// -----------------------------------------------------------------------
--// PPCRE-COMPILED PCREGULAR EXPPCRESSIONS
--//
--// PCRE makes it easy to use any string as a regular expression, without
--// requiring a separate compilation step.
--//
--// If speed is of the essence, you can create a pre-compiled "PCRE"
--// object from the pattern and use it multiple times. If you do so,
--// you can typically parse text faster than with sscanf.
--//
--// Example: precompile pattern for faster matching:
--// PCRE pattern("h.*o");
--// while (ReadLine(&str)) {
--// if (PCRE::FullMatch(str, pattern)) ...;
--// }
--//
--// -----------------------------------------------------------------------
--// SCANNING TEXT INCPCREMENTALLY
--//
--// The "Consume" operation may be useful if you want to repeatedly
--// match regular expressions at the front of a string and skip over
--// them as they match. This requires use of the "StringPiece" type,
--// which represents a sub-range of a real string.
--//
--// Example: read lines of the form "var = value" from a string.
--// string contents = ...; // Fill string somehow
--// StringPiece input(contents); // Wrap a StringPiece around it
--//
--// string var;
--// int value;
--// while (PCRE::Consume(&input, "(\\w+) = (\\d+)\n", &var, &value)) {
--// ...;
--// }
--//
--// Each successful call to "Consume" will set "var/value", and also
--// advance "input" so it points past the matched text. Note that if the
--// regular expression matches an empty string, input will advance
--// by 0 bytes. If the regular expression being used might match
--// an empty string, the loop body must check for this case and either
--// advance the string or break out of the loop.
--//
--// The "FindAndConsume" operation is similar to "Consume" but does not
--// anchor your match at the beginning of the string. For example, you
--// could extract all words from a string by repeatedly calling
--// PCRE::FindAndConsume(&input, "(\\w+)", &word)
--//
--// -----------------------------------------------------------------------
--// PARSING HEX/OCTAL/C-RADIX NUMBERS
--//
--// By default, if you pass a pointer to a numeric value, the
--// corresponding text is interpreted as a base-10 number. You can
--// instead wrap the pointer with a call to one of the operators Hex(),
--// Octal(), or CRadix() to interpret the text in another base. The
--// CRadix operator interprets C-style "0" (base-8) and "0x" (base-16)
--// prefixes, but defaults to base-10.
--//
--// Example:
--// int a, b, c, d;
--// CHECK(PCRE::FullMatch("100 40 0100 0x40", "(.*) (.*) (.*) (.*)",
--// Octal(&a), Hex(&b), CRadix(&c), CRadix(&d));
--// will leave 64 in a, b, c, and d.
--
--#include "util/util.h"
--#include "re2/stringpiece.h"
--
--#ifdef USEPCRE
--#include <pcre.h>
--namespace re2 {
--const bool UsingPCRE = true;
--} // namespace re2
--#else
--namespace re2 {
--const bool UsingPCRE = false;
--struct pcre;
--struct pcre_extra { int flags, match_limit, match_limit_recursion; };
--#define pcre_free(x) {}
--#define PCRE_EXTRA_MATCH_LIMIT 0
--#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0
--#define PCRE_ANCHORED 0
--#define PCRE_NOTEMPTY 0
--#define PCRE_ERROR_NOMATCH 1
--#define PCRE_ERROR_MATCHLIMIT 2
--#define PCRE_ERROR_RECURSIONLIMIT 3
--#define PCRE_INFO_CAPTURECOUNT 0
--#define pcre_compile(a,b,c,d,e) ({ (void)(a); (void)(b); *(c)=""; *(d)=0; (void)(e); ((pcre*)0); })
--#define pcre_exec(a, b, c, d, e, f, g, h) ({ (void)(a); (void)(b); (void)(c); (void)(d); (void)(e); (void)(f); (void)(g); (void)(h); 0; })
--#define pcre_fullinfo(a, b, c, d) ({ (void)(a); (void)(b); (void)(c); *(d) = 0; 0; })
--} // namespace re2
--#endif
--
--namespace re2 {
--
--class PCRE_Options;
--
--// Interface for regular expression matching. Also corresponds to a
--// pre-compiled regular expression. An "PCRE" object is safe for
--// concurrent use by multiple threads.
--class PCRE {
-- public:
-- // We convert user-passed pointers into special Arg objects
-- class Arg;
--
-- // Marks end of arg list.
-- // ONLY USE IN OPTIONAL ARG DEFAULTS.
-- // DO NOT PASS EXPLICITLY.
-- static Arg no_more_args;
--
-- // Options are same value as those in pcre. We provide them here
-- // to avoid users needing to include pcre.h and also to isolate
-- // users from pcre should we change the underlying library.
-- // Only those needed by Google programs are exposed here to
-- // avoid collision with options employed internally by regexp.cc
-- // Note that some options have equivalents that can be specified in
-- // the regexp itself. For example, prefixing your regexp with
-- // "(?s)" has the same effect as the PCRE_DOTALL option.
-- enum Option {
-- None = 0x0000,
-- UTF8 = 0x0800, // == PCRE_UTF8
-- EnabledCompileOptions = UTF8,
-- EnabledExecOptions = 0x0000, // TODO: use to replace anchor flag
-- };
--
-- // We provide implicit conversions from strings so that users can
-- // pass in a string or a "const char*" wherever an "PCRE" is expected.
-- PCRE(const char* pattern);
-- PCRE(const char* pattern, Option option);
-- PCRE(const string& pattern);
-- PCRE(const string& pattern, Option option);
-- PCRE(const char *pattern, const PCRE_Options& re_option);
-- PCRE(const string& pattern, const PCRE_Options& re_option);
--
-- ~PCRE();
--
-- // The string specification for this PCRE. E.g.
-- // PCRE re("ab*c?d+");
-- // re.pattern(); // "ab*c?d+"
-- const string& pattern() const { return pattern_; }
--
-- // If PCRE could not be created properly, returns an error string.
-- // Else returns the empty string.
-- const string& error() const { return *error_; }
--
-- // Whether the PCRE has hit a match limit during execution.
-- // Not thread safe. Intended only for testing.
-- // If hitting match limits is a problem,
-- // you should be using PCRE2 (re2/re2.h)
-- // instead of checking this flag.
-- bool HitLimit();
-- void ClearHitLimit();
--
-- /***** The useful part: the matching interface *****/
--
-- // Matches "text" against "pattern". If pointer arguments are
-- // supplied, copies matched sub-patterns into them.
-- //
-- // You can pass in a "const char*" or a "string" for "text".
-- // You can pass in a "const char*" or a "string" or a "PCRE" for "pattern".
-- //
-- // The provided pointer arguments can be pointers to any scalar numeric
-- // type, or one of:
-- // string (matched piece is copied to string)
-- // StringPiece (StringPiece is mutated to point to matched piece)
-- // T (where "bool T::ParseFrom(const char*, int)" exists)
-- // (void*)NULL (the corresponding matched sub-pattern is not copied)
-- //
-- // Returns true iff all of the following conditions are satisfied:
-- // a. "text" matches "pattern" exactly
-- // b. The number of matched sub-patterns is >= number of supplied pointers
-- // c. The "i"th argument has a suitable type for holding the
-- // string captured as the "i"th sub-pattern. If you pass in
-- // NULL for the "i"th argument, or pass fewer arguments than
-- // number of sub-patterns, "i"th captured sub-pattern is
-- // ignored.
-- //
-- // CAVEAT: An optional sub-pattern that does not exist in the
-- // matched string is assigned the empty string. Therefore, the
-- // following will return false (because the empty string is not a
-- // valid number):
-- // int number;
-- // PCRE::FullMatch("abc", "[a-z]+(\\d+)?", &number);
-- struct FullMatchFunctor {
-- bool operator ()(const StringPiece& text, const PCRE& re, // 3..16 args
-- const Arg& ptr1 = no_more_args,
-- const Arg& ptr2 = no_more_args,
-- const Arg& ptr3 = no_more_args,
-- const Arg& ptr4 = no_more_args,
-- const Arg& ptr5 = no_more_args,
-- const Arg& ptr6 = no_more_args,
-- const Arg& ptr7 = no_more_args,
-- const Arg& ptr8 = no_more_args,
-- const Arg& ptr9 = no_more_args,
-- const Arg& ptr10 = no_more_args,
-- const Arg& ptr11 = no_more_args,
-- const Arg& ptr12 = no_more_args,
-- const Arg& ptr13 = no_more_args,
-- const Arg& ptr14 = no_more_args,
-- const Arg& ptr15 = no_more_args,
-- const Arg& ptr16 = no_more_args) const;
-- };
--
-- static const FullMatchFunctor FullMatch;
--
-- // Exactly like FullMatch(), except that "pattern" is allowed to match
-- // a substring of "text".
-- struct PartialMatchFunctor {
-- bool operator ()(const StringPiece& text, const PCRE& re, // 3..16 args
-- const Arg& ptr1 = no_more_args,
-- const Arg& ptr2 = no_more_args,
-- const Arg& ptr3 = no_more_args,
-- const Arg& ptr4 = no_more_args,
-- const Arg& ptr5 = no_more_args,
-- const Arg& ptr6 = no_more_args,
-- const Arg& ptr7 = no_more_args,
-- const Arg& ptr8 = no_more_args,
-- const Arg& ptr9 = no_more_args,
-- const Arg& ptr10 = no_more_args,
-- const Arg& ptr11 = no_more_args,
-- const Arg& ptr12 = no_more_args,
-- const Arg& ptr13 = no_more_args,
-- const Arg& ptr14 = no_more_args,
-- const Arg& ptr15 = no_more_args,
-- const Arg& ptr16 = no_more_args) const;
-- };
--
-- static const PartialMatchFunctor PartialMatch;
--
-- // Like FullMatch() and PartialMatch(), except that pattern has to
-- // match a prefix of "text", and "input" is advanced past the matched
-- // text. Note: "input" is modified iff this routine returns true.
-- struct ConsumeFunctor {
-- bool operator ()(StringPiece* input, const PCRE& pattern, // 3..16 args
-- const Arg& ptr1 = no_more_args,
-- const Arg& ptr2 = no_more_args,
-- const Arg& ptr3 = no_more_args,
-- const Arg& ptr4 = no_more_args,
-- const Arg& ptr5 = no_more_args,
-- const Arg& ptr6 = no_more_args,
-- const Arg& ptr7 = no_more_args,
-- const Arg& ptr8 = no_more_args,
-- const Arg& ptr9 = no_more_args,
-- const Arg& ptr10 = no_more_args,
-- const Arg& ptr11 = no_more_args,
-- const Arg& ptr12 = no_more_args,
-- const Arg& ptr13 = no_more_args,
-- const Arg& ptr14 = no_more_args,
-- const Arg& ptr15 = no_more_args,
-- const Arg& ptr16 = no_more_args) const;
-- };
--
-- static const ConsumeFunctor Consume;
--
-- // Like Consume(..), but does not anchor the match at the beginning of the
-- // string. That is, "pattern" need not start its match at the beginning of
-- // "input". For example, "FindAndConsume(s, "(\\w+)", &word)" finds the next
-- // word in "s" and stores it in "word".
-- struct FindAndConsumeFunctor {
-- bool operator ()(StringPiece* input, const PCRE& pattern,
-- const Arg& ptr1 = no_more_args,
-- const Arg& ptr2 = no_more_args,
-- const Arg& ptr3 = no_more_args,
-- const Arg& ptr4 = no_more_args,
-- const Arg& ptr5 = no_more_args,
-- const Arg& ptr6 = no_more_args,
-- const Arg& ptr7 = no_more_args,
-- const Arg& ptr8 = no_more_args,
-- const Arg& ptr9 = no_more_args,
-- const Arg& ptr10 = no_more_args,
-- const Arg& ptr11 = no_more_args,
-- const Arg& ptr12 = no_more_args,
-- const Arg& ptr13 = no_more_args,
-- const Arg& ptr14 = no_more_args,
-- const Arg& ptr15 = no_more_args,
-- const Arg& ptr16 = no_more_args) const;
-- };
--
-- static const FindAndConsumeFunctor FindAndConsume;
--
-- // Replace the first match of "pattern" in "str" with "rewrite".
-- // Within "rewrite", backslash-escaped digits (\1 to \9) can be
-- // used to insert text matching corresponding parenthesized group
-- // from the pattern. \0 in "rewrite" refers to the entire matching
-- // text. E.g.,
-- //
-- // string s = "yabba dabba doo";
-- // CHECK(PCRE::Replace(&s, "b+", "d"));
-- //
-- // will leave "s" containing "yada dabba doo"
-- //
-- // Returns true if the pattern matches and a replacement occurs,
-- // false otherwise.
-- static bool Replace(string *str,
-- const PCRE& pattern,
-- const StringPiece& rewrite);
--
-- // Like Replace(), except replaces all occurrences of the pattern in
-- // the string with the rewrite. Replacements are not subject to
-- // re-matching. E.g.,
-- //
-- // string s = "yabba dabba doo";
-- // CHECK(PCRE::GlobalReplace(&s, "b+", "d"));
-- //
-- // will leave "s" containing "yada dada doo"
-- //
-- // Returns the number of replacements made.
-- static int GlobalReplace(string *str,
-- const PCRE& pattern,
-- const StringPiece& rewrite);
--
-- // Like Replace, except that if the pattern matches, "rewrite"
-- // is copied into "out" with substitutions. The non-matching
-- // portions of "text" are ignored.
-- //
-- // Returns true iff a match occurred and the extraction happened
-- // successfully; if no match occurs, the string is left unaffected.
-- static bool Extract(const StringPiece &text,
-- const PCRE& pattern,
-- const StringPiece &rewrite,
-- string *out);
--
-- // Check that the given @p rewrite string is suitable for use with
-- // this PCRE. It checks that:
-- // * The PCRE has enough parenthesized subexpressions to satisfy all
-- // of the \N tokens in @p rewrite, and
-- // * The @p rewrite string doesn't have any syntax errors
-- // ('\' followed by anything besides [0-9] and '\').
-- // Making this test will guarantee that "replace" and "extract"
-- // operations won't LOG(ERROR) or fail because of a bad rewrite
-- // string.
-- // @param rewrite The proposed rewrite string.
-- // @param error An error message is recorded here, iff we return false.
-- // Otherwise, it is unchanged.
-- // @return true, iff @p rewrite is suitable for use with the PCRE.
-- bool CheckRewriteString(const StringPiece& rewrite, string* error) const;
--
-- // Returns a copy of 'unquoted' with all potentially meaningful
-- // regexp characters backslash-escaped. The returned string, used
-- // as a regular expression, will exactly match the original string.
-- // For example,
-- // 1.5-2.0?
-- // becomes:
-- // 1\.5\-2\.0\?
-- static string QuoteMeta(const StringPiece& unquoted);
--
-- /***** Generic matching interface (not so nice to use) *****/
--
-- // Type of match (TODO: Should be restructured as an Option)
-- enum Anchor {
-- UNANCHORED, // No anchoring
-- ANCHOR_START, // Anchor at start only
-- ANCHOR_BOTH, // Anchor at start and end
-- };
--
-- // General matching routine. Stores the length of the match in
-- // "*consumed" if successful.
-- bool DoMatch(const StringPiece& text,
-- Anchor anchor,
-- int* consumed,
-- const Arg* const* args, int n) const;
--
-- // Return the number of capturing subpatterns, or -1 if the
-- // regexp wasn't valid on construction.
-- int NumberOfCapturingGroups() const;
--
-- private:
-- void Init(const char* pattern, Option option, int match_limit,
-- int stack_limit, bool report_errors);
--
-- // Match against "text", filling in "vec" (up to "vecsize" * 2/3) with
-- // pairs of integers for the beginning and end positions of matched
-- // text. The first pair corresponds to the entire matched text;
-- // subsequent pairs correspond, in order, to parentheses-captured
-- // matches. Returns the number of pairs (one more than the number of
-- // the last subpattern with a match) if matching was successful
-- // and zero if the match failed.
-- // I.e. for PCRE("(foo)|(bar)|(baz)") it will return 2, 3, and 4 when matching
-- // against "foo", "bar", and "baz" respectively.
-- // When matching PCRE("(foo)|hello") against "hello", it will return 1.
-- // But the values for all subpattern are filled in into "vec".
-- int TryMatch(const StringPiece& text,
-- int startpos,
-- Anchor anchor,
-- bool empty_ok,
-- int *vec,
-- int vecsize) const;
--
-- // Append the "rewrite" string, with backslash subsitutions from "text"
-- // and "vec", to string "out".
-- bool Rewrite(string *out,
-- const StringPiece &rewrite,
-- const StringPiece &text,
-- int *vec,
-- int veclen) const;
--
-- // internal implementation for DoMatch
-- bool DoMatchImpl(const StringPiece& text,
-- Anchor anchor,
-- int* consumed,
-- const Arg* const args[],
-- int n,
-- int* vec,
-- int vecsize) const;
--
-- // Compile the regexp for the specified anchoring mode
-- pcre* Compile(Anchor anchor);
--
-- string pattern_;
-- Option options_;
-- pcre* re_full_; // For full matches
-- pcre* re_partial_; // For partial matches
-- const string* error_; // Error indicator (or empty string)
-- bool report_errors_; // Silences error logging if false
-- int match_limit_; // Limit on execution resources
-- int stack_limit_; // Limit on stack resources (bytes)
-- mutable int32_t hit_limit_; // Hit limit during execution (bool)?
-- DISALLOW_EVIL_CONSTRUCTORS(PCRE);
--};
--
--// PCRE_Options allow you to set the PCRE::Options, plus any pcre
--// "extra" options. The only extras are match_limit, which limits
--// the CPU time of a match, and stack_limit, which limits the
--// stack usage. Setting a limit to <= 0 lets PCRE pick a sensible default
--// that should not cause too many problems in production code.
--// If PCRE hits a limit during a match, it may return a false negative,
--// but (hopefully) it won't crash.
--//
--// NOTE: If you are handling regular expressions specified by
--// (external or internal) users, rather than hard-coded ones,
--// you should be using PCRE2, which uses an alternate implementation
--// that avoids these issues. See http://go/re2quick.
--class PCRE_Options {
-- public:
-- // constructor
-- PCRE_Options() : option_(PCRE::None), match_limit_(0), stack_limit_(0), report_errors_(true) {}
-- // accessors
-- PCRE::Option option() const { return option_; }
-- void set_option(PCRE::Option option) {
-- option_ = option;
-- }
-- int match_limit() const { return match_limit_; }
-- void set_match_limit(int match_limit) {
-- match_limit_ = match_limit;
-- }
-- int stack_limit() const { return stack_limit_; }
-- void set_stack_limit(int stack_limit) {
-- stack_limit_ = stack_limit;
-- }
--
-- // If the regular expression is malformed, an error message will be printed
-- // iff report_errors() is true. Default: true.
-- bool report_errors() const { return report_errors_; }
-- void set_report_errors(bool report_errors) {
-- report_errors_ = report_errors;
-- }
-- private:
-- PCRE::Option option_;
-- int match_limit_;
-- int stack_limit_;
-- bool report_errors_;
--};
--
--
--/***** Implementation details *****/
--
--// Hex/Octal/Binary?
--
--// Special class for parsing into objects that define a ParseFrom() method
--template <class T>
--class _PCRE_MatchObject {
-- public:
-- static inline bool Parse(const char* str, int n, void* dest) {
-- if (dest == NULL) return true;
-- T* object = reinterpret_cast<T*>(dest);
-- return object->ParseFrom(str, n);
-- }
--};
--
--class PCRE::Arg {
-- public:
-- // Empty constructor so we can declare arrays of PCRE::Arg
-- Arg();
--
-- // Constructor specially designed for NULL arguments
-- Arg(void*);
--
-- typedef bool (*Parser)(const char* str, int n, void* dest);
--
--// Type-specific parsers
--#define MAKE_PARSER(type,name) \
-- Arg(type* p) : arg_(p), parser_(name) { } \
-- Arg(type* p, Parser parser) : arg_(p), parser_(parser) { } \
--
--
-- MAKE_PARSER(char, parse_char);
-- MAKE_PARSER(unsigned char, parse_uchar);
-- MAKE_PARSER(short, parse_short);
-- MAKE_PARSER(unsigned short, parse_ushort);
-- MAKE_PARSER(int, parse_int);
-- MAKE_PARSER(unsigned int, parse_uint);
-- MAKE_PARSER(long, parse_long);
-- MAKE_PARSER(unsigned long, parse_ulong);
-- MAKE_PARSER(long long, parse_longlong);
-- MAKE_PARSER(unsigned long long, parse_ulonglong);
-- MAKE_PARSER(float, parse_float);
-- MAKE_PARSER(double, parse_double);
-- MAKE_PARSER(string, parse_string);
-- MAKE_PARSER(StringPiece, parse_stringpiece);
--
--#undef MAKE_PARSER
--
-- // Generic constructor
-- template <class T> Arg(T*, Parser parser);
-- // Generic constructor template
-- template <class T> Arg(T* p)
-- : arg_(p), parser_(_PCRE_MatchObject<T>::Parse) {
-- }
--
-- // Parse the data
-- bool Parse(const char* str, int n) const;
--
-- private:
-- void* arg_;
-- Parser parser_;
--
-- static bool parse_null (const char* str, int n, void* dest);
-- static bool parse_char (const char* str, int n, void* dest);
-- static bool parse_uchar (const char* str, int n, void* dest);
-- static bool parse_float (const char* str, int n, void* dest);
-- static bool parse_double (const char* str, int n, void* dest);
-- static bool parse_string (const char* str, int n, void* dest);
-- static bool parse_stringpiece (const char* str, int n, void* dest);
--
--#define DECLARE_INTEGER_PARSER(name) \
-- private: \
-- static bool parse_ ## name(const char* str, int n, void* dest); \
-- static bool parse_ ## name ## _radix( \
-- const char* str, int n, void* dest, int radix); \
-- public: \
-- static bool parse_ ## name ## _hex(const char* str, int n, void* dest); \
-- static bool parse_ ## name ## _octal(const char* str, int n, void* dest); \
-- static bool parse_ ## name ## _cradix(const char* str, int n, void* dest)
--
-- DECLARE_INTEGER_PARSER(short);
-- DECLARE_INTEGER_PARSER(ushort);
-- DECLARE_INTEGER_PARSER(int);
-- DECLARE_INTEGER_PARSER(uint);
-- DECLARE_INTEGER_PARSER(long);
-- DECLARE_INTEGER_PARSER(ulong);
-- DECLARE_INTEGER_PARSER(longlong);
-- DECLARE_INTEGER_PARSER(ulonglong);
--
--#undef DECLARE_INTEGER_PARSER
--};
--
--inline PCRE::Arg::Arg() : arg_(NULL), parser_(parse_null) { }
--inline PCRE::Arg::Arg(void* p) : arg_(p), parser_(parse_null) { }
--
--inline bool PCRE::Arg::Parse(const char* str, int n) const {
-- return (*parser_)(str, n, arg_);
--}
--
--// This part of the parser, appropriate only for ints, deals with bases
--#define MAKE_INTEGER_PARSER(type, name) \
-- inline PCRE::Arg Hex(type* ptr) { \
-- return PCRE::Arg(ptr, PCRE::Arg::parse_ ## name ## _hex); } \
-- inline PCRE::Arg Octal(type* ptr) { \
-- return PCRE::Arg(ptr, PCRE::Arg::parse_ ## name ## _octal); } \
-- inline PCRE::Arg CRadix(type* ptr) { \
-- return PCRE::Arg(ptr, PCRE::Arg::parse_ ## name ## _cradix); }
--
--MAKE_INTEGER_PARSER(short, short);
--MAKE_INTEGER_PARSER(unsigned short, ushort);
--MAKE_INTEGER_PARSER(int, int);
--MAKE_INTEGER_PARSER(unsigned int, uint);
--MAKE_INTEGER_PARSER(long, long);
--MAKE_INTEGER_PARSER(unsigned long, ulong);
--MAKE_INTEGER_PARSER(long long, longlong);
--MAKE_INTEGER_PARSER(unsigned long long, ulonglong);
--
--#undef MAKE_INTEGER_PARSER
--
--} // namespace re2
-diff --git a/re2/util/random.cc b/re2/util/random.cc
-deleted file mode 100644
-index 49d6195..0000000
---- a/re2/util/random.cc
-+++ /dev/null
-@@ -1,34 +0,0 @@
--// Copyright 2005-2009 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Modified from Google perftools's tcmalloc_unittest.cc.
--
--#include "util/random.h"
--
--namespace re2 {
--
--int32 ACMRandom::Next() {
-- const int32 M = 2147483647L; // 2^31-1
-- const int32 A = 16807;
-- // In effect, we are computing seed_ = (seed_ * A) % M, where M = 2^31-1
-- uint32 lo = A * (int32)(seed_ & 0xFFFF);
-- uint32 hi = A * (int32)((uint32)seed_ >> 16);
-- lo += (hi & 0x7FFF) << 16;
-- if (lo > M) {
-- lo &= M;
-- ++lo;
-- }
-- lo += hi >> 15;
-- if (lo > M) {
-- lo &= M;
-- ++lo;
-- }
-- return (seed_ = (int32) lo);
--}
--
--int32 ACMRandom::Uniform(int32 n) {
-- return Next() % n;
--}
--
--} // namespace re2
-diff --git a/re2/util/random.h b/re2/util/random.h
-deleted file mode 100644
-index 6c6e701..0000000
---- a/re2/util/random.h
-+++ /dev/null
-@@ -1,29 +0,0 @@
--// Copyright 2005-2009 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Modified from Google perftools's tcmalloc_unittest.cc.
--
--#ifndef RE2_UTIL_RANDOM_H__
--#define RE2_UTIL_RANDOM_H__
--
--#include "util/util.h"
--
--namespace re2 {
--
--// ACM minimal standard random number generator. (re-entrant.)
--class ACMRandom {
-- public:
-- ACMRandom(int32 seed) : seed_(seed) {}
-- int32 Next();
-- int32 Uniform(int32);
--
-- void Reset(int32 seed) { seed_ = seed; }
--
-- private:
-- int32 seed_;
--};
--
--} // namespace re2
--
--#endif // RE2_UTIL_RANDOM_H__
-diff --git a/re2/util/rune.cc b/re2/util/rune.cc
-deleted file mode 100644
-index 26442b0..0000000
---- a/re2/util/rune.cc
-+++ /dev/null
-@@ -1,258 +0,0 @@
--/*
-- * The authors of this software are Rob Pike and Ken Thompson.
-- * Copyright (c) 2002 by Lucent Technologies.
-- * Permission to use, copy, modify, and distribute this software for any
-- * purpose without fee is hereby granted, provided that this entire notice
-- * is included in all copies of any software which is or includes a copy
-- * or modification of this software and in all copies of the supporting
-- * documentation for such software.
-- * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
-- * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
-- * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
-- * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
-- */
--#include <stdarg.h>
--#include <string.h>
--#include "util/utf.h"
--
--namespace re2 {
--
--enum
--{
-- Bit1 = 7,
-- Bitx = 6,
-- Bit2 = 5,
-- Bit3 = 4,
-- Bit4 = 3,
-- Bit5 = 2,
--
-- T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
-- Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
-- T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
-- T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
-- T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
-- T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
--
-- Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */
-- Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */
-- Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */
-- Rune4 = (1<<(Bit4+3*Bitx))-1,
-- /* 0001 1111 1111 1111 1111 1111 */
--
-- Maskx = (1<<Bitx)-1, /* 0011 1111 */
-- Testx = Maskx ^ 0xFF, /* 1100 0000 */
--
-- Bad = Runeerror,
--};
--
--int
--chartorune(Rune *rune, const char *str)
--{
-- int c, c1, c2, c3;
-- long l;
--
-- /*
-- * one character sequence
-- * 00000-0007F => T1
-- */
-- c = *(unsigned char*)str;
-- if(c < Tx) {
-- *rune = c;
-- return 1;
-- }
--
-- /*
-- * two character sequence
-- * 0080-07FF => T2 Tx
-- */
-- c1 = *(unsigned char*)(str+1) ^ Tx;
-- if(c1 & Testx)
-- goto bad;
-- if(c < T3) {
-- if(c < T2)
-- goto bad;
-- l = ((c << Bitx) | c1) & Rune2;
-- if(l <= Rune1)
-- goto bad;
-- *rune = l;
-- return 2;
-- }
--
-- /*
-- * three character sequence
-- * 0800-FFFF => T3 Tx Tx
-- */
-- c2 = *(unsigned char*)(str+2) ^ Tx;
-- if(c2 & Testx)
-- goto bad;
-- if(c < T4) {
-- l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
-- if(l <= Rune2)
-- goto bad;
-- *rune = l;
-- return 3;
-- }
--
-- /*
-- * four character sequence (21-bit value)
-- * 10000-1FFFFF => T4 Tx Tx Tx
-- */
-- c3 = *(unsigned char*)(str+3) ^ Tx;
-- if (c3 & Testx)
-- goto bad;
-- if (c < T5) {
-- l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
-- if (l <= Rune3)
-- goto bad;
-- *rune = l;
-- return 4;
-- }
--
-- /*
-- * Support for 5-byte or longer UTF-8 would go here, but
-- * since we don't have that, we'll just fall through to bad.
-- */
--
-- /*
-- * bad decoding
-- */
--bad:
-- *rune = Bad;
-- return 1;
--}
--
--int
--runetochar(char *str, const Rune *rune)
--{
-- /* Runes are signed, so convert to unsigned for range check. */
-- unsigned long c;
--
-- /*
-- * one character sequence
-- * 00000-0007F => 00-7F
-- */
-- c = *rune;
-- if(c <= Rune1) {
-- str[0] = c;
-- return 1;
-- }
--
-- /*
-- * two character sequence
-- * 0080-07FF => T2 Tx
-- */
-- if(c <= Rune2) {
-- str[0] = T2 | (c >> 1*Bitx);
-- str[1] = Tx | (c & Maskx);
-- return 2;
-- }
--
-- /*
-- * If the Rune is out of range, convert it to the error rune.
-- * Do this test here because the error rune encodes to three bytes.
-- * Doing it earlier would duplicate work, since an out of range
-- * Rune wouldn't have fit in one or two bytes.
-- */
-- if (c > Runemax)
-- c = Runeerror;
--
-- /*
-- * three character sequence
-- * 0800-FFFF => T3 Tx Tx
-- */
-- if (c <= Rune3) {
-- str[0] = T3 | (c >> 2*Bitx);
-- str[1] = Tx | ((c >> 1*Bitx) & Maskx);
-- str[2] = Tx | (c & Maskx);
-- return 3;
-- }
--
-- /*
-- * four character sequence (21-bit value)
-- * 10000-1FFFFF => T4 Tx Tx Tx
-- */
-- str[0] = T4 | (c >> 3*Bitx);
-- str[1] = Tx | ((c >> 2*Bitx) & Maskx);
-- str[2] = Tx | ((c >> 1*Bitx) & Maskx);
-- str[3] = Tx | (c & Maskx);
-- return 4;
--}
--
--int
--runelen(Rune rune)
--{
-- char str[10];
--
-- return runetochar(str, &rune);
--}
--
--int
--fullrune(const char *str, int n)
--{
-- if (n > 0) {
-- int c = *(unsigned char*)str;
-- if (c < Tx)
-- return 1;
-- if (n > 1) {
-- if (c < T3)
-- return 1;
-- if (n > 2) {
-- if (c < T4 || n > 3)
-- return 1;
-- }
-- }
-- }
-- return 0;
--}
--
--
--int
--utflen(const char *s)
--{
-- int c;
-- long n;
-- Rune rune;
--
-- n = 0;
-- for(;;) {
-- c = *(unsigned char*)s;
-- if(c < Runeself) {
-- if(c == 0)
-- return n;
-- s++;
-- } else
-- s += chartorune(&rune, s);
-- n++;
-- }
-- return 0;
--}
--
--char*
--utfrune(const char *s, Rune c)
--{
-- long c1;
-- Rune r;
-- int n;
--
-- if(c < Runesync) /* not part of utf sequence */
-- return strchr((char*)s, c);
--
-- for(;;) {
-- c1 = *(unsigned char*)s;
-- if(c1 < Runeself) { /* one byte rune */
-- if(c1 == 0)
-- return 0;
-- if(c1 == c)
-- return (char*)s;
-- s++;
-- continue;
-- }
-- n = chartorune(&r, s);
-- if(r == c)
-- return (char*)s;
-- s += n;
-- }
-- return 0;
--}
--
--} // namespace re2
-diff --git a/re2/util/sparse_array.h b/re2/util/sparse_array.h
-deleted file mode 100644
-index c024bed..0000000
---- a/re2/util/sparse_array.h
-+++ /dev/null
-@@ -1,451 +0,0 @@
--// Copyright 2006 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// DESCRIPTION
--//
--// SparseArray<T>(m) is a map from integers in [0, m) to T values.
--// It requires (sizeof(T)+sizeof(int))*m memory, but it provides
--// fast iteration through the elements in the array and fast clearing
--// of the array. The array has a concept of certain elements being
--// uninitialized (having no value).
--//
--// Insertion and deletion are constant time operations.
--//
--// Allocating the array is a constant time operation
--// when memory allocation is a constant time operation.
--//
--// Clearing the array is a constant time operation (unusual!).
--//
--// Iterating through the array is an O(n) operation, where n
--// is the number of items in the array (not O(m)).
--//
--// The array iterator visits entries in the order they were first
--// inserted into the array. It is safe to add items to the array while
--// using an iterator: the iterator will visit indices added to the array
--// during the iteration, but will not re-visit indices whose values
--// change after visiting. Thus SparseArray can be a convenient
--// implementation of a work queue.
--//
--// The SparseArray implementation is NOT thread-safe. It is up to the
--// caller to make sure only one thread is accessing the array. (Typically
--// these arrays are temporary values and used in situations where speed is
--// important.)
--//
--// The SparseArray interface does not present all the usual STL bells and
--// whistles.
--//
--// Implemented with reference to Briggs & Torczon, An Efficient
--// Representation for Sparse Sets, ACM Letters on Programming Languages
--// and Systems, Volume 2, Issue 1-4 (March-Dec. 1993), pp. 59-69.
--//
--// Briggs & Torczon popularized this technique, but it had been known
--// long before their paper. They point out that Aho, Hopcroft, and
--// Ullman's 1974 Design and Analysis of Computer Algorithms and Bentley's
--// 1986 Programming Pearls both hint at the technique in exercises to the
--// reader (in Aho & Hopcroft, exercise 2.12; in Bentley, column 1
--// exercise 8).
--//
--// Briggs & Torczon describe a sparse set implementation. I have
--// trivially generalized it to create a sparse array (actually the original
--// target of the AHU and Bentley exercises).
--
--// IMPLEMENTATION
--//
--// SparseArray uses a vector dense_ and an array sparse_to_dense_, both of
--// size max_size_. At any point, the number of elements in the sparse array is
--// size_.
--//
--// The vector dense_ contains the size_ elements in the sparse array (with
--// their indices),
--// in the order that the elements were first inserted. This array is dense:
--// the size_ pairs are dense_[0] through dense_[size_-1].
--//
--// The array sparse_to_dense_ maps from indices in [0,m) to indices in
--// [0,size_).
--// For indices present in the array, dense_[sparse_to_dense_[i]].index_ == i.
--// For indices not present in the array, sparse_to_dense_ can contain
--// any value at all, perhaps outside the range [0, size_) but perhaps not.
--//
--// The lax requirement on sparse_to_dense_ values makes clearing
--// the array very easy: set size_ to 0. Lookups are slightly more
--// complicated. An index i has a value in the array if and only if:
--// sparse_to_dense_[i] is in [0, size_) AND
--// dense_[sparse_to_dense_[i]].index_ == i.
--// If both these properties hold, only then it is safe to refer to
--// dense_[sparse_to_dense_[i]].value_
--// as the value associated with index i.
--//
--// To insert a new entry, set sparse_to_dense_[i] to size_,
--// initialize dense_[size_], and then increment size_.
--//
--// Deletion of specific values from the array is implemented by
--// swapping dense_[size_-1] and the dense_ being deleted and then
--// updating the appropriate sparse_to_dense_ entries.
--//
--// To make the sparse array as efficient as possible for non-primitive types,
--// elements may or may not be destroyed when they are deleted from the sparse
--// array through a call to erase(), erase_existing() or resize(). They
--// immediately become inaccessible, but they are only guaranteed to be
--// destroyed when the SparseArray destructor is called.
--
--#ifndef RE2_UTIL_SPARSE_ARRAY_H__
--#define RE2_UTIL_SPARSE_ARRAY_H__
--
--#include "util/util.h"
--
--namespace re2 {
--
--template<typename Value>
--class SparseArray {
-- public:
-- SparseArray();
-- SparseArray(int max_size);
-- ~SparseArray();
--
-- // IndexValue pairs: exposed in SparseArray::iterator.
-- class IndexValue;
--
-- typedef IndexValue value_type;
-- typedef typename vector<IndexValue>::iterator iterator;
-- typedef typename vector<IndexValue>::const_iterator const_iterator;
--
-- inline const IndexValue& iv(int i) const;
--
-- // Return the number of entries in the array.
-- int size() const {
-- return size_;
-- }
--
-- // Iterate over the array.
-- iterator begin() {
-- return dense_.begin();
-- }
-- iterator end() {
-- return dense_.begin() + size_;
-- }
--
-- const_iterator begin() const {
-- return dense_.begin();
-- }
-- const_iterator end() const {
-- return dense_.begin() + size_;
-- }
--
-- // Change the maximum size of the array.
-- // Invalidates all iterators.
-- void resize(int max_size);
--
-- // Return the maximum size of the array.
-- // Indices can be in the range [0, max_size).
-- int max_size() const {
-- return max_size_;
-- }
--
-- // Clear the array.
-- void clear() {
-- size_ = 0;
-- }
--
-- // Check whether index i is in the array.
-- inline bool has_index(int i) const;
--
-- // Comparison function for sorting.
-- // Can sort the sparse array so that future iterations
-- // will visit indices in increasing order using
-- // sort(arr.begin(), arr.end(), arr.less);
-- static bool less(const IndexValue& a, const IndexValue& b);
--
-- public:
-- // Set the value at index i to v.
-- inline iterator set(int i, Value v);
--
-- pair<iterator, bool> insert(const value_type& new_value);
--
-- // Returns the value at index i
-- // or defaultv if index i is not initialized in the array.
-- inline Value get(int i, Value defaultv) const;
--
-- iterator find(int i);
--
-- const_iterator find(int i) const;
--
-- // Change the value at index i to v.
-- // Fast but unsafe: only use if has_index(i) is true.
-- inline iterator set_existing(int i, Value v);
--
-- // Set the value at the new index i to v.
-- // Fast but unsafe: only use if has_index(i) is false.
-- inline iterator set_new(int i, Value v);
--
-- // Get the value at index i from the array..
-- // Fast but unsafe: only use if has_index(i) is true.
-- inline Value get_existing(int i) const;
--
-- // Erasing items from the array during iteration is in general
-- // NOT safe. There is one special case, which is that the current
-- // index-value pair can be erased as long as the iterator is then
-- // checked for being at the end before being incremented.
-- // For example:
-- //
-- // for (i = m.begin(); i != m.end(); ++i) {
-- // if (ShouldErase(i->index(), i->value())) {
-- // m.erase(i->index());
-- // --i;
-- // }
-- // }
-- //
-- // Except in the specific case just described, elements must
-- // not be erased from the array (including clearing the array)
-- // while iterators are walking over the array. Otherwise,
-- // the iterators could walk past the end of the array.
--
-- // Erases the element at index i from the array.
-- inline void erase(int i);
--
-- // Erases the element at index i from the array.
-- // Fast but unsafe: only use if has_index(i) is true.
-- inline void erase_existing(int i);
--
-- private:
-- // Add the index i to the array.
-- // Only use if has_index(i) is known to be false.
-- // Since it doesn't set the value associated with i,
-- // this function is private, only intended as a helper
-- // for other methods.
-- inline void create_index(int i);
--
-- // In debug mode, verify that some invariant properties of the class
-- // are being maintained. This is called at the end of the constructor
-- // and at the beginning and end of all public non-const member functions.
-- inline void DebugCheckInvariants() const;
--
-- int size_;
-- int max_size_;
-- int* sparse_to_dense_;
-- vector<IndexValue> dense_;
--
-- DISALLOW_EVIL_CONSTRUCTORS(SparseArray);
--};
--
--template<typename Value>
--SparseArray<Value>::SparseArray()
-- : size_(0), max_size_(0), sparse_to_dense_(NULL), dense_() {}
--
--// IndexValue pairs: exposed in SparseArray::iterator.
--template<typename Value>
--class SparseArray<Value>::IndexValue {
-- friend class SparseArray;
-- public:
-- typedef int first_type;
-- typedef Value second_type;
--
-- IndexValue() {}
-- IndexValue(int index, const Value& value) : second(value), index_(index) {}
--
-- int index() const { return index_; }
-- Value value() const { return second; }
--
-- // Provide the data in the 'second' member so that the utilities
-- // in map-util work.
-- Value second;
--
-- private:
-- int index_;
--};
--
--template<typename Value>
--const typename SparseArray<Value>::IndexValue&
--SparseArray<Value>::iv(int i) const {
-- DCHECK_GE(i, 0);
-- DCHECK_LT(i, size_);
-- return dense_[i];
--}
--
--// Change the maximum size of the array.
--// Invalidates all iterators.
--template<typename Value>
--void SparseArray<Value>::resize(int new_max_size) {
-- DebugCheckInvariants();
-- if (new_max_size > max_size_) {
-- int* a = new int[new_max_size];
-- if (sparse_to_dense_) {
-- memmove(a, sparse_to_dense_, max_size_*sizeof a[0]);
-- // Don't need to zero the memory but appease Valgrind.
-- if (RunningOnValgrind()) {
-- for (int i = max_size_; i < new_max_size; i++)
-- a[i] = 0xababababU;
-- }
-- delete[] sparse_to_dense_;
-- }
-- sparse_to_dense_ = a;
--
-- dense_.resize(new_max_size);
-- }
-- max_size_ = new_max_size;
-- if (size_ > max_size_)
-- size_ = max_size_;
-- DebugCheckInvariants();
--}
--
--// Check whether index i is in the array.
--template<typename Value>
--bool SparseArray<Value>::has_index(int i) const {
-- DCHECK_GE(i, 0);
-- DCHECK_LT(i, max_size_);
-- if (static_cast<uint>(i) >= max_size_) {
-- return false;
-- }
-- // Unsigned comparison avoids checking sparse_to_dense_[i] < 0.
-- return (uint)sparse_to_dense_[i] < (uint)size_ &&
-- dense_[sparse_to_dense_[i]].index_ == i;
--}
--
--// Set the value at index i to v.
--template<typename Value>
--typename SparseArray<Value>::iterator SparseArray<Value>::set(int i, Value v) {
-- DebugCheckInvariants();
-- if (static_cast<uint>(i) >= max_size_) {
-- // Semantically, end() would be better here, but we already know
-- // the user did something stupid, so begin() insulates them from
-- // dereferencing an invalid pointer.
-- return begin();
-- }
-- if (!has_index(i))
-- create_index(i);
-- return set_existing(i, v);
--}
--
--template<typename Value>
--pair<typename SparseArray<Value>::iterator, bool> SparseArray<Value>::insert(
-- const value_type& new_value) {
-- DebugCheckInvariants();
-- pair<typename SparseArray<Value>::iterator, bool> p;
-- if (has_index(new_value.index_)) {
-- p = make_pair(dense_.begin() + sparse_to_dense_[new_value.index_], false);
-- } else {
-- p = make_pair(set_new(new_value.index_, new_value.second), true);
-- }
-- DebugCheckInvariants();
-- return p;
--}
--
--template<typename Value>
--Value SparseArray<Value>::get(int i, Value defaultv) const {
-- if (!has_index(i))
-- return defaultv;
-- return get_existing(i);
--}
--
--template<typename Value>
--typename SparseArray<Value>::iterator SparseArray<Value>::find(int i) {
-- if (has_index(i))
-- return dense_.begin() + sparse_to_dense_[i];
-- return end();
--}
--
--template<typename Value>
--typename SparseArray<Value>::const_iterator
--SparseArray<Value>::find(int i) const {
-- if (has_index(i)) {
-- return dense_.begin() + sparse_to_dense_[i];
-- }
-- return end();
--}
--
--template<typename Value>
--typename SparseArray<Value>::iterator
--SparseArray<Value>::set_existing(int i, Value v) {
-- DebugCheckInvariants();
-- DCHECK(has_index(i));
-- dense_[sparse_to_dense_[i]].second = v;
-- DebugCheckInvariants();
-- return dense_.begin() + sparse_to_dense_[i];
--}
--
--template<typename Value>
--typename SparseArray<Value>::iterator
--SparseArray<Value>::set_new(int i, Value v) {
-- DebugCheckInvariants();
-- if (static_cast<uint>(i) >= max_size_) {
-- // Semantically, end() would be better here, but we already know
-- // the user did something stupid, so begin() insulates them from
-- // dereferencing an invalid pointer.
-- return begin();
-- }
-- DCHECK(!has_index(i));
-- create_index(i);
-- return set_existing(i, v);
--}
--
--template<typename Value>
--Value SparseArray<Value>::get_existing(int i) const {
-- DCHECK(has_index(i));
-- return dense_[sparse_to_dense_[i]].second;
--}
--
--template<typename Value>
--void SparseArray<Value>::erase(int i) {
-- DebugCheckInvariants();
-- if (has_index(i))
-- erase_existing(i);
-- DebugCheckInvariants();
--}
--
--template<typename Value>
--void SparseArray<Value>::erase_existing(int i) {
-- DebugCheckInvariants();
-- DCHECK(has_index(i));
-- int di = sparse_to_dense_[i];
-- if (di < size_ - 1) {
-- dense_[di] = dense_[size_ - 1];
-- sparse_to_dense_[dense_[di].index_] = di;
-- }
-- size_--;
-- DebugCheckInvariants();
--}
--
--template<typename Value>
--void SparseArray<Value>::create_index(int i) {
-- DCHECK(!has_index(i));
-- DCHECK_LT(size_, max_size_);
-- sparse_to_dense_[i] = size_;
-- dense_[size_].index_ = i;
-- size_++;
--}
--
--template<typename Value> SparseArray<Value>::SparseArray(int max_size) {
-- max_size_ = max_size;
-- sparse_to_dense_ = new int[max_size];
-- dense_.resize(max_size);
-- // Don't need to zero the new memory, but appease Valgrind.
-- if (RunningOnValgrind()) {
-- for (int i = 0; i < max_size; i++) {
-- sparse_to_dense_[i] = 0xababababU;
-- dense_[i].index_ = 0xababababU;
-- }
-- }
-- size_ = 0;
-- DebugCheckInvariants();
--}
--
--template<typename Value> SparseArray<Value>::~SparseArray() {
-- DebugCheckInvariants();
-- delete[] sparse_to_dense_;
--}
--
--template<typename Value> void SparseArray<Value>::DebugCheckInvariants() const {
-- DCHECK_LE(0, size_);
-- DCHECK_LE(size_, max_size_);
-- DCHECK(size_ == 0 || sparse_to_dense_ != NULL);
--}
--
--// Comparison function for sorting.
--template<typename Value> bool SparseArray<Value>::less(const IndexValue& a,
-- const IndexValue& b) {
-- return a.index_ < b.index_;
--}
--
--} // namespace re2
--
--#endif // RE2_UTIL_SPARSE_ARRAY_H__
-diff --git a/re2/util/sparse_array_test.cc b/re2/util/sparse_array_test.cc
-deleted file mode 100644
-index bc7a19f..0000000
---- a/re2/util/sparse_array_test.cc
-+++ /dev/null
-@@ -1,150 +0,0 @@
--// Copyright 2006 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// Simple tests that SparseArray behaves.
--
--#include "util/util.h"
--#include "utest/utest.h"
--
--namespace re2 {
--
--static const string kNotFound = "NOT FOUND";
--
--TEST(SparseArray, BasicOperations) {
-- static const int n = 50;
-- SparseArray<int> set(n);
--
-- int order[n];
-- int value[n];
-- for (int i = 0; i < n; i++)
-- order[i] = i;
-- for (int i = 0; i < n; i++)
-- value[i] = rand()%1000 + 1;
-- for (int i = 1; i < n; i++) {
-- int j = rand()%i;
-- int t = order[i];
-- order[i] = order[j];
-- order[j] = t;
-- }
--
-- for (int i = 0;; i++) {
-- for (int j = 0; j < i; j++) {
-- ASSERT_TRUE(set.has_index(order[j]));
-- ASSERT_EQ(value[order[j]], set.get(order[j], -1));
-- }
-- if (i >= n)
-- break;
-- for (int j = i; j < n; j++)
-- ASSERT_FALSE(set.has_index(order[j]));
-- set.set(order[i], value[order[i]]);
-- }
--
-- int nn = 0;
-- for (SparseArray<int>::iterator i = set.begin(); i != set.end(); ++i) {
-- ASSERT_EQ(order[nn++], i->index());
-- ASSERT_EQ(value[i->index()], i->value());
-- }
-- ASSERT_EQ(nn, n);
--
-- set.clear();
-- for (int i = 0; i < n; i++)
-- ASSERT_FALSE(set.has_index(i));
--
-- ASSERT_EQ(0, set.size());
-- ASSERT_EQ(0, distance(set.begin(), set.end()));
--}
--
--class SparseArrayStringTest : public testing::Test {
-- protected:
-- SparseArrayStringTest()
-- : str_map_(10) {
-- InsertOrUpdate(&str_map_, 1, "a");
-- InsertOrUpdate(&str_map_, 5, "b");
-- InsertOrUpdate(&str_map_, 2, "c");
-- InsertOrUpdate(&str_map_, 7, "d");
-- }
--
-- SparseArray<string> str_map_;
-- typedef SparseArray<string>::iterator iterator;
--};
--
--TEST_F(SparseArrayStringTest, FindGetsPresentElement) {
-- iterator it = str_map_.find(2);
-- ASSERT_TRUE(str_map_.end() != it);
-- EXPECT_EQ("c", it->second);
--}
--
--TEST_F(SparseArrayStringTest, FindDoesNotFindAbsentElement) {
-- iterator it = str_map_.find(3);
-- ASSERT_TRUE(str_map_.end() == it);
--}
--
--TEST_F(SparseArrayStringTest, ContainsKey) {
-- EXPECT_TRUE(ContainsKey(str_map_, 1));
-- EXPECT_TRUE(ContainsKey(str_map_, 2));
-- EXPECT_FALSE(ContainsKey(str_map_, 3));
--}
--
--TEST_F(SparseArrayStringTest, InsertIfNotPresent) {
-- EXPECT_FALSE(ContainsKey(str_map_, 3));
-- EXPECT_TRUE(InsertIfNotPresent(&str_map_, 3, "r"));
-- EXPECT_EQ("r", FindWithDefault(str_map_, 3, kNotFound));
-- EXPECT_FALSE(InsertIfNotPresent(&str_map_, 3, "other value"));
-- EXPECT_EQ("r", FindWithDefault(str_map_, 3, kNotFound));
--}
--
--TEST(SparseArrayTest, Erase) {
-- SparseArray<string> str_map(5);
-- str_map.set(1, "a");
-- str_map.set(2, "b");
-- EXPECT_EQ("a", FindWithDefault(str_map, 1, kNotFound));
-- EXPECT_EQ("b", FindWithDefault(str_map, 2, kNotFound));
-- str_map.erase(1);
-- EXPECT_EQ("NOT FOUND", FindWithDefault(str_map, 1, kNotFound));
-- EXPECT_EQ("b", FindWithDefault(str_map, 2, kNotFound));
--}
--
--typedef SparseArrayStringTest SparseArrayStringSurvivesInvalidIndexTest;
--// TODO(jyasskin): Cover invalid arguments to every method.
--
--TEST_F(SparseArrayStringSurvivesInvalidIndexTest, SetNegative) {
-- EXPECT_DEBUG_DEATH(str_map_.set(-123456789, "hi"),
-- "\\(jyasskin\\) Illegal index -123456789 passed to"
-- " SparseArray\\(10\\).set\\(\\).");
-- EXPECT_EQ(4, str_map_.size());
--}
--
--TEST_F(SparseArrayStringSurvivesInvalidIndexTest, SetTooBig) {
-- EXPECT_DEBUG_DEATH(str_map_.set(12345678, "hi"),
-- "\\(jyasskin\\) Illegal index 12345678 passed to"
-- " SparseArray\\(10\\).set\\(\\).");
-- EXPECT_EQ(4, str_map_.size());
--}
--
--TEST_F(SparseArrayStringSurvivesInvalidIndexTest, SetNew_Negative) {
-- EXPECT_DEBUG_DEATH(str_map_.set_new(-123456789, "hi"),
-- "\\(jyasskin\\) Illegal index -123456789 passed to"
-- " SparseArray\\(10\\).set_new\\(\\).");
-- EXPECT_EQ(4, str_map_.size());
--}
--
--TEST_F(SparseArrayStringSurvivesInvalidIndexTest, SetNew_Existing) {
-- EXPECT_DEBUG_DEATH({
-- str_map_.set_new(2, "hi");
-- EXPECT_EQ("hi", FindWithDefault(str_map_, 2, kNotFound));
--
-- // The old value for 2 is still present, but can never be removed.
-- // This risks crashing later, if the map fills up.
-- EXPECT_EQ(5, str_map_.size());
-- }, "Check failed: !has_index\\(i\\)");
--}
--
--TEST_F(SparseArrayStringSurvivesInvalidIndexTest, SetNew_TooBig) {
-- EXPECT_DEBUG_DEATH(str_map_.set_new(12345678, "hi"),
-- "\\(jyasskin\\) Illegal index 12345678 passed to"
-- " SparseArray\\(10\\).set_new\\(\\).");
-- EXPECT_EQ(4, str_map_.size());
--}
--
--} // namespace re2
-diff --git a/re2/util/sparse_set.h b/re2/util/sparse_set.h
-deleted file mode 100644
-index 9cb5753..0000000
---- a/re2/util/sparse_set.h
-+++ /dev/null
-@@ -1,177 +0,0 @@
--// Copyright 2006 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--// DESCRIPTION
--//
--// SparseSet<T>(m) is a set of integers in [0, m).
--// It requires sizeof(int)*m memory, but it provides
--// fast iteration through the elements in the set and fast clearing
--// of the set.
--//
--// Insertion and deletion are constant time operations.
--//
--// Allocating the set is a constant time operation
--// when memory allocation is a constant time operation.
--//
--// Clearing the set is a constant time operation (unusual!).
--//
--// Iterating through the set is an O(n) operation, where n
--// is the number of items in the set (not O(m)).
--//
--// The set iterator visits entries in the order they were first
--// inserted into the array. It is safe to add items to the set while
--// using an iterator: the iterator will visit indices added to the set
--// during the iteration, but will not re-visit indices whose values
--// change after visiting. Thus SparseSet can be a convenient
--// implementation of a work queue.
--//
--// The SparseSet implementation is NOT thread-safe. It is up to the
--// caller to make sure only one thread is accessing the set. (Typically
--// these sets are temporary values and used in situations where speed is
--// important.)
--//
--// The SparseSet interface does not present all the usual STL bells and
--// whistles.
--//
--// Implemented with reference to Briggs & Torczon, An Efficient
--// Representation for Sparse Sets, ACM Letters on Programming Languages
--// and Systems, Volume 2, Issue 1-4 (March-Dec. 1993), pp. 59-69.
--//
--// For a generalization to sparse array, see sparse_array.h.
--
--// IMPLEMENTATION
--//
--// See sparse_array.h for implementation details
--
--#ifndef RE2_UTIL_SPARSE_SET_H__
--#define RE2_UTIL_SPARSE_SET_H__
--
--#include "util/util.h"
--
--namespace re2 {
--
--class SparseSet {
-- public:
-- SparseSet()
-- : size_(0), max_size_(0), sparse_to_dense_(NULL), dense_(NULL) {}
--
-- SparseSet(int max_size) {
-- max_size_ = max_size;
-- sparse_to_dense_ = new int[max_size];
-- dense_ = new int[max_size];
-- // Don't need to zero the memory, but do so anyway
-- // to appease Valgrind.
-- if (RunningOnValgrind()) {
-- for (int i = 0; i < max_size; i++) {
-- dense_[i] = 0xababababU;
-- sparse_to_dense_[i] = 0xababababU;
-- }
-- }
-- size_ = 0;
-- }
--
-- ~SparseSet() {
-- delete[] sparse_to_dense_;
-- delete[] dense_;
-- }
--
-- typedef int* iterator;
-- typedef const int* const_iterator;
--
-- int size() const { return size_; }
-- iterator begin() { return dense_; }
-- iterator end() { return dense_ + size_; }
-- const_iterator begin() const { return dense_; }
-- const_iterator end() const { return dense_ + size_; }
--
-- // Change the maximum size of the array.
-- // Invalidates all iterators.
-- void resize(int new_max_size) {
-- if (size_ > new_max_size)
-- size_ = new_max_size;
-- if (new_max_size > max_size_) {
-- int* a = new int[new_max_size];
-- if (sparse_to_dense_) {
-- memmove(a, sparse_to_dense_, max_size_*sizeof a[0]);
-- if (RunningOnValgrind()) {
-- for (int i = max_size_; i < new_max_size; i++)
-- a[i] = 0xababababU;
-- }
-- delete[] sparse_to_dense_;
-- }
-- sparse_to_dense_ = a;
--
-- a = new int[new_max_size];
-- if (dense_) {
-- memmove(a, dense_, size_*sizeof a[0]);
-- if (RunningOnValgrind()) {
-- for (int i = size_; i < new_max_size; i++)
-- a[i] = 0xababababU;
-- }
-- delete[] dense_;
-- }
-- dense_ = a;
-- }
-- max_size_ = new_max_size;
-- }
--
-- // Return the maximum size of the array.
-- // Indices can be in the range [0, max_size).
-- int max_size() const { return max_size_; }
--
-- // Clear the array.
-- void clear() { size_ = 0; }
--
-- // Check whether i is in the array.
-- bool contains(int i) const {
-- DCHECK_GE(i, 0);
-- DCHECK_LT(i, max_size_);
-- if (static_cast<uint>(i) >= max_size_) {
-- return false;
-- }
-- // Unsigned comparison avoids checking sparse_to_dense_[i] < 0.
-- return (uint)sparse_to_dense_[i] < (uint)size_ &&
-- dense_[sparse_to_dense_[i]] == i;
-- }
--
-- // Adds i to the set.
-- void insert(int i) {
-- if (!contains(i))
-- insert_new(i);
-- }
--
-- // Set the value at the new index i to v.
-- // Fast but unsafe: only use if contains(i) is false.
-- void insert_new(int i) {
-- if (static_cast<uint>(i) >= max_size_) {
-- // Semantically, end() would be better here, but we already know
-- // the user did something stupid, so begin() insulates them from
-- // dereferencing an invalid pointer.
-- return;
-- }
-- DCHECK(!contains(i));
-- DCHECK_LT(size_, max_size_);
-- sparse_to_dense_[i] = size_;
-- dense_[size_] = i;
-- size_++;
-- }
--
-- // Comparison function for sorting.
-- // Can sort the sparse array so that future iterations
-- // will visit indices in increasing order using
-- // sort(arr.begin(), arr.end(), arr.less);
-- static bool less(int a, int b) { return a < b; }
--
-- private:
-- int size_;
-- int max_size_;
-- int* sparse_to_dense_;
-- int* dense_;
--
-- DISALLOW_EVIL_CONSTRUCTORS(SparseSet);
--};
--
--} // namespace re2
--
--#endif // RE2_UTIL_SPARSE_SET_H__
-diff --git a/re2/util/stringpiece.cc b/re2/util/stringpiece.cc
-deleted file mode 100644
-index 37895b0..0000000
---- a/re2/util/stringpiece.cc
-+++ /dev/null
-@@ -1,87 +0,0 @@
--// Copyright 2004 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--#include "re2/stringpiece.h"
--#include "util/util.h"
--
--using re2::StringPiece;
--
--std::ostream& operator<<(std::ostream& o, const StringPiece& piece) {
-- o.write(piece.data(), piece.size());
-- return o;
--}
--
--bool StringPiece::_equal(const StringPiece& x, const StringPiece& y) {
-- int len = x.size();
-- if (len != y.size()) {
-- return false;
-- }
-- const char* p = x.data();
-- const char* p2 = y.data();
-- // Test last byte in case strings share large common prefix
-- if ((len > 0) && (p[len-1] != p2[len-1])) return false;
-- const char* p_limit = p + len;
-- for (; p < p_limit; p++, p2++) {
-- if (*p != *p2)
-- return false;
-- }
-- return true;
--}
--
--void StringPiece::CopyToString(string* target) const {
-- target->assign(ptr_, length_);
--}
--
--int StringPiece::copy(char* buf, size_type n, size_type pos) const {
-- int ret = min(length_ - pos, n);
-- memcpy(buf, ptr_ + pos, ret);
-- return ret;
--}
--
--int StringPiece::find(const StringPiece& s, size_type pos) const {
-- if (length_ < 0 || pos > static_cast<size_type>(length_))
-- return npos;
--
-- const char* result = std::search(ptr_ + pos, ptr_ + length_,
-- s.ptr_, s.ptr_ + s.length_);
-- const size_type xpos = result - ptr_;
-- return xpos + s.length_ <= length_ ? xpos : npos;
--}
--
--int StringPiece::find(char c, size_type pos) const {
-- if (length_ <= 0 || pos >= static_cast<size_type>(length_)) {
-- return npos;
-- }
-- const char* result = std::find(ptr_ + pos, ptr_ + length_, c);
-- return result != ptr_ + length_ ? result - ptr_ : npos;
--}
--
--int StringPiece::rfind(const StringPiece& s, size_type pos) const {
-- if (length_ < s.length_) return npos;
-- const size_t ulen = length_;
-- if (s.length_ == 0) return min(ulen, pos);
--
-- const char* last = ptr_ + min(ulen - s.length_, pos) + s.length_;
-- const char* result = std::find_end(ptr_, last, s.ptr_, s.ptr_ + s.length_);
-- return result != last ? result - ptr_ : npos;
--}
--
--int StringPiece::rfind(char c, size_type pos) const {
-- if (length_ <= 0) return npos;
-- for (int i = min(pos, static_cast<size_type>(length_ - 1));
-- i >= 0; --i) {
-- if (ptr_[i] == c) {
-- return i;
-- }
-- }
-- return npos;
--}
--
--StringPiece StringPiece::substr(size_type pos, size_type n) const {
-- if (pos > length_) pos = length_;
-- if (n > length_ - pos) n = length_ - pos;
-- return StringPiece(ptr_ + pos, n);
--}
--
--const StringPiece::size_type StringPiece::npos = size_type(-1);
-diff --git a/re2/util/stringprintf.cc b/re2/util/stringprintf.cc
-deleted file mode 100644
-index c908181..0000000
---- a/re2/util/stringprintf.cc
-+++ /dev/null
-@@ -1,78 +0,0 @@
--// Copyright 2002 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--#include "util/util.h"
--
--namespace re2 {
--
--static void StringAppendV(string* dst, const char* format, va_list ap) {
-- // First try with a small fixed size buffer
-- char space[1024];
--
-- // It's possible for methods that use a va_list to invalidate
-- // the data in it upon use. The fix is to make a copy
-- // of the structure before using it and use that copy instead.
-- va_list backup_ap;
-- va_copy(backup_ap, ap);
-- int result = vsnprintf(space, sizeof(space), format, backup_ap);
-- va_end(backup_ap);
--
-- if ((result >= 0) && (result < sizeof(space))) {
-- // It fit
-- dst->append(space, result);
-- return;
-- }
--
-- // Repeatedly increase buffer size until it fits
-- int length = sizeof(space);
-- while (true) {
-- if (result < 0) {
-- // Older behavior: just try doubling the buffer size
-- length *= 2;
-- } else {
-- // We need exactly "result+1" characters
-- length = result+1;
-- }
-- char* buf = new char[length];
--
-- // Restore the va_list before we use it again
-- va_copy(backup_ap, ap);
-- result = vsnprintf(buf, length, format, backup_ap);
-- va_end(backup_ap);
--
-- if ((result >= 0) && (result < length)) {
-- // It fit
-- dst->append(buf, result);
-- delete[] buf;
-- return;
-- }
-- delete[] buf;
-- }
--}
--
--string StringPrintf(const char* format, ...) {
-- va_list ap;
-- va_start(ap, format);
-- string result;
-- StringAppendV(&result, format, ap);
-- va_end(ap);
-- return result;
--}
--
--void SStringPrintf(string* dst, const char* format, ...) {
-- va_list ap;
-- va_start(ap, format);
-- dst->clear();
-- StringAppendV(dst, format, ap);
-- va_end(ap);
--}
--
--void StringAppendF(string* dst, const char* format, ...) {
-- va_list ap;
-- va_start(ap, format);
-- StringAppendV(dst, format, ap);
-- va_end(ap);
--}
--
--} // namespace re2
-diff --git a/re2/util/strutil.cc b/re2/util/strutil.cc
-deleted file mode 100644
-index 6ab79b3..0000000
---- a/re2/util/strutil.cc
-+++ /dev/null
-@@ -1,97 +0,0 @@
--// Copyright 1999-2005 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--#include "util/util.h"
--#include "re2/stringpiece.h"
--
--namespace re2 {
--
--// ----------------------------------------------------------------------
--// CEscapeString()
--// Copies 'src' to 'dest', escaping dangerous characters using
--// C-style escape sequences. 'src' and 'dest' should not overlap.
--// Returns the number of bytes written to 'dest' (not including the \0)
--// or -1 if there was insufficient space.
--// ----------------------------------------------------------------------
--int CEscapeString(const char* src, int src_len, char* dest,
-- int dest_len) {
-- const char* src_end = src + src_len;
-- int used = 0;
--
-- for (; src < src_end; src++) {
-- if (dest_len - used < 2) // Need space for two letter escape
-- return -1;
--
-- unsigned char c = *src;
-- switch (c) {
-- case '\n': dest[used++] = '\\'; dest[used++] = 'n'; break;
-- case '\r': dest[used++] = '\\'; dest[used++] = 'r'; break;
-- case '\t': dest[used++] = '\\'; dest[used++] = 't'; break;
-- case '\"': dest[used++] = '\\'; dest[used++] = '\"'; break;
-- case '\'': dest[used++] = '\\'; dest[used++] = '\''; break;
-- case '\\': dest[used++] = '\\'; dest[used++] = '\\'; break;
-- default:
-- // Note that if we emit \xNN and the src character after that is a hex
-- // digit then that digit must be escaped too to prevent it being
-- // interpreted as part of the character code by C.
-- if (c < ' ' || c > '~') {
-- if (dest_len - used < 4) // need space for 4 letter escape
-- return -1;
-- sprintf(dest + used, "\\%03o", c);
-- used += 4;
-- } else {
-- dest[used++] = c; break;
-- }
-- }
-- }
--
-- if (dest_len - used < 1) // make sure that there is room for \0
-- return -1;
--
-- dest[used] = '\0'; // doesn't count towards return value though
-- return used;
--}
--
--
--// ----------------------------------------------------------------------
--// CEscape()
--// Copies 'src' to result, escaping dangerous characters using
--// C-style escape sequences. 'src' and 'dest' should not overlap.
--// ----------------------------------------------------------------------
--string CEscape(const StringPiece& src) {
-- const int dest_length = src.size() * 4 + 1; // Maximum possible expansion
-- char* dest = new char[dest_length];
-- const int len = CEscapeString(src.data(), src.size(),
-- dest, dest_length);
-- string s = string(dest, len);
-- delete[] dest;
-- return s;
--}
--
--string PrefixSuccessor(const StringPiece& prefix) {
-- // We can increment the last character in the string and be done
-- // unless that character is 255, in which case we have to erase the
-- // last character and increment the previous character, unless that
-- // is 255, etc. If the string is empty or consists entirely of
-- // 255's, we just return the empty string.
-- bool done = false;
-- string limit(prefix.data(), prefix.size());
-- int index = limit.length() - 1;
-- while (!done && index >= 0) {
-- if ((limit[index]&255) == 255) {
-- limit.erase(index);
-- index--;
-- } else {
-- limit[index]++;
-- done = true;
-- }
-- }
-- if (!done) {
-- return "";
-- } else {
-- return limit;
-- }
--}
--
--} // namespace re2
-diff --git a/re2/util/test.cc b/re2/util/test.cc
-deleted file mode 100644
-index 0644829..0000000
---- a/re2/util/test.cc
-+++ /dev/null
-@@ -1,39 +0,0 @@
--// Copyright 2009 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--#include <stdio.h>
--#include <sys/resource.h>
--#include "util/test.h"
--
--DEFINE_string(test_tmpdir, "/var/tmp", "temp directory");
--
--struct Test {
-- void (*fn)(void);
-- const char *name;
--};
--
--static Test tests[10000];
--static int ntests;
--
--void RegisterTest(void (*fn)(void), const char *name) {
-- tests[ntests].fn = fn;
-- tests[ntests++].name = name;
--}
--
--namespace re2 {
--int64 VirtualProcessSize() {
-- struct rusage ru;
-- getrusage(RUSAGE_SELF, &ru);
-- return (int64)ru.ru_maxrss*1024;
--}
--} // namespace re2
--
--int main(int argc, char **argv) {
-- for (int i = 0; i < ntests; i++) {
-- printf("%s\n", tests[i].name);
-- tests[i].fn();
-- }
-- printf("PASS\n");
-- return 0;
--}
-diff --git a/re2/util/test.h b/re2/util/test.h
-deleted file mode 100644
-index 0f93865..0000000
---- a/re2/util/test.h
-+++ /dev/null
-@@ -1,57 +0,0 @@
--// Copyright 2009 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--#ifndef RE2_UTIL_TEST_H__
--#define RE2_UTIL_TEST_H__
--
--#include "util/util.h"
--#include "util/flags.h"
--
--#define TEST(x, y) \
-- void x##y(void); \
-- TestRegisterer r##x##y(x##y, # x "." # y); \
-- void x##y(void)
--
--void RegisterTest(void (*)(void), const char*);
--
--class TestRegisterer {
-- public:
-- TestRegisterer(void (*fn)(void), const char *s) {
-- RegisterTest(fn, s);
-- }
--};
--
--// TODO(rsc): Do a better job.
--#define EXPECT_EQ CHECK_EQ
--#define EXPECT_TRUE CHECK
--#define EXPECT_LT CHECK_LT
--#define EXPECT_GT CHECK_GT
--#define EXPECT_LE CHECK_LE
--#define EXPECT_GE CHECK_GE
--#define EXPECT_FALSE(x) CHECK(!(x))
--
--#define ARRAYSIZE arraysize
--
--#define EXPECT_TRUE_M(x, y) CHECK(x) << (y)
--#define EXPECT_FALSE_M(x, y) CHECK(!(x)) << (y)
--#define ASSERT_TRUE_M(x, y) CHECK(x) << (y)
--#define ASSERT_EQUALS(x, y) CHECK_EQ(x, y)
--
--const bool UsingMallocCounter = false;
--namespace testing {
--class MallocCounter {
-- public:
-- MallocCounter(int x) { }
-- static const int THIS_THREAD_ONLY = 0;
-- long long HeapGrowth() { return 0; }
-- long long PeakHeapGrowth() { return 0; }
-- void Reset() { }
--};
--} // namespace testing
--
--namespace re2 {
--int64 VirtualProcessSize();
--} // namespace re2
--
--#endif // RE2_UTIL_TEST_H__
-diff --git a/re2/util/thread.cc b/re2/util/thread.cc
-deleted file mode 100644
-index 7349991..0000000
---- a/re2/util/thread.cc
-+++ /dev/null
-@@ -1,44 +0,0 @@
--// Copyright 2009 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--#include <pthread.h>
--
--#include "util/util.h"
--#include "util/thread.h"
--
--Thread::Thread() {
-- pid_ = 0;
-- running_ = 0;
-- joinable_ = 0;
--}
--
--Thread::~Thread() {
--}
--
--void *startThread(void *v) {
-- Thread* t = (Thread*)v;
-- t->Run();
-- return 0;
--}
--
--void Thread::Start() {
-- CHECK(!running_);
-- pthread_create(&pid_, 0, startThread, this);
-- running_ = true;
-- if (!joinable_)
-- pthread_detach(pid_);
--}
--
--void Thread::Join() {
-- CHECK(running_);
-- CHECK(joinable_);
-- void *val;
-- pthread_join(pid_, &val);
-- running_ = 0;
--}
--
--void Thread::SetJoinable(bool j) {
-- CHECK(!running_);
-- joinable_ = j;
--}
-diff --git a/re2/util/thread.h b/re2/util/thread.h
-deleted file mode 100644
-index b9610e0..0000000
---- a/re2/util/thread.h
-+++ /dev/null
-@@ -1,26 +0,0 @@
--// Copyright 2009 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--#ifndef RE2_UTIL_THREAD_H__
--#define RE2_UTIL_THREAD_H__
--
--#include <pthread.h>
--
--class Thread {
-- public:
-- Thread();
-- virtual ~Thread();
-- void Start();
-- void Join();
-- void SetJoinable(bool);
-- virtual void Run() = 0;
--
-- private:
-- pthread_t pid_;
-- bool running_;
-- bool joinable_;
--};
--
--#endif // RE2_UTIL_THREAD_H__
--
-diff --git a/re2/util/utf.h b/re2/util/utf.h
-deleted file mode 100644
-index 06ff8f0..0000000
---- a/re2/util/utf.h
-+++ /dev/null
-@@ -1,43 +0,0 @@
--/*
-- * The authors of this software are Rob Pike and Ken Thompson.
-- * Copyright (c) 2002 by Lucent Technologies.
-- * Permission to use, copy, modify, and distribute this software for any
-- * purpose without fee is hereby granted, provided that this entire notice
-- * is included in all copies of any software which is or includes a copy
-- * or modification of this software and in all copies of the supporting
-- * documentation for such software.
-- * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
-- * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
-- * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
-- * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
-- *
-- * This file and rune.cc have been converted to compile as C++ code
-- * in name space re2.
-- */
--#ifndef RE2_UTIL_UTF_H__
--#define RE2_UTIL_UTF_H__
--
--#include <stdint.h>
--
--namespace re2 {
--
--typedef signed int Rune; /* Code-point values in Unicode 4.0 are 21 bits wide.*/
--
--enum
--{
-- UTFmax = 4, /* maximum bytes per rune */
-- Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */
-- Runeself = 0x80, /* rune and UTF sequences are the same (<) */
-- Runeerror = 0xFFFD, /* decoding error in UTF */
-- Runemax = 0x10FFFF, /* maximum rune value */
--};
--
--int runetochar(char* s, const Rune* r);
--int chartorune(Rune* r, const char* s);
--int fullrune(const char* s, int n);
--int utflen(const char* s);
--char* utfrune(const char*, Rune);
--
--} // namespace re2
--
--#endif // RE2_UTIL_UTF_H__
-diff --git a/re2/util/util.h b/re2/util/util.h
-deleted file mode 100644
-index 0859658..0000000
---- a/re2/util/util.h
-+++ /dev/null
-@@ -1,127 +0,0 @@
--// Copyright 2009 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--#ifndef RE2_UTIL_UTIL_H__
--#define RE2_UTIL_UTIL_H__
--
--// C
--#include <stdio.h>
--#include <string.h>
--#include <stdint.h>
--#include <stddef.h> // For size_t
--#include <assert.h>
--#include <stdarg.h>
--#include <sys/time.h>
--#include <time.h>
--
--// C++
--#include <vector>
--#include <string>
--#include <algorithm>
--#include <iosfwd>
--#include <map>
--#include <stack>
--#include <iostream>
--#include <utility>
--#include <set>
--
--// Use std names.
--using std::set;
--using std::pair;
--using std::vector;
--using std::string;
--using std::min;
--using std::max;
--using std::ostream;
--using std::map;
--using std::stack;
--using std::sort;
--using std::swap;
--using std::make_pair;
--
--#if defined(__GNUC__) && !defined(USE_CXX0X)
--
--#include <tr1/unordered_set>
--using std::tr1::unordered_set;
--
--#else
--
--#include <unordered_set>
--using std::unordered_set;
--
--#endif
--
--namespace re2 {
--
--typedef int8_t int8;
--typedef uint8_t uint8;
--typedef int16_t int16;
--typedef uint16_t uint16;
--typedef int32_t int32;
--typedef uint32_t uint32;
--typedef int64_t int64;
--typedef uint64_t uint64;
--
--typedef unsigned long ulong;
--typedef unsigned int uint;
--typedef unsigned short ushort;
--
--// COMPILE_ASSERT causes a compile error about msg if expr is not true.
--template<bool> struct CompileAssert {};
--#define COMPILE_ASSERT(expr, msg) \
-- typedef CompileAssert<(bool(expr))> msg[bool(expr) ? 1 : -1]
--
--// DISALLOW_EVIL_CONSTRUCTORS disallows the copy and operator= functions.
--// It goes in the private: declarations in a class.
--#define DISALLOW_EVIL_CONSTRUCTORS(TypeName) \
-- TypeName(const TypeName&); \
-- void operator=(const TypeName&)
--
--#define arraysize(array) (sizeof(array)/sizeof((array)[0]))
--
--// Fake lock annotations. For real ones, see
--// http://code.google.com/p/data-race-test/
--#define ANNOTATE_PUBLISH_MEMORY_RANGE(a, b)
--#define ANNOTATE_IGNORE_WRITES_BEGIN()
--#define ANNOTATE_IGNORE_WRITES_END()
--#define ANNOTATE_BENIGN_RACE(a, b)
--#define NO_THREAD_SAFETY_ANALYSIS
--#define ANNOTATE_HAPPENS_BEFORE(x)
--#define ANNOTATE_HAPPENS_AFTER(x)
--
--class StringPiece;
--
--string CEscape(const StringPiece& src);
--int CEscapeString(const char* src, int src_len, char* dest, int dest_len);
--
--extern string StringPrintf(const char* format, ...);
--extern void SStringPrintf(string* dst, const char* format, ...);
--extern void StringAppendF(string* dst, const char* format, ...);
--extern string PrefixSuccessor(const StringPiece& prefix);
--
--uint32 hashword(const uint32*, size_t, uint32);
--void hashword2(const uint32*, size_t, uint32*, uint32*);
--
--static inline uint32 Hash32StringWithSeed(const char* s, int len, uint32 seed) {
-- return hashword((uint32*)s, len/4, seed);
--}
--
--static inline uint64 Hash64StringWithSeed(const char* s, int len, uint32 seed) {
-- uint32 x, y;
-- x = seed;
-- y = 0;
-- hashword2((uint32*)s, len/4, &x, &y);
-- return ((uint64)x << 32) | y;
--}
--
--int RunningOnValgrind();
--
--} // namespace re2
--
--#include "util/arena.h"
--#include "util/logging.h"
--#include "util/mutex.h"
--#include "util/utf.h"
--
--#endif // RE2_UTIL_UTIL_H__
-diff --git a/re2/util/valgrind.cc b/re2/util/valgrind.cc
-deleted file mode 100644
-index 749bb59..0000000
---- a/re2/util/valgrind.cc
-+++ /dev/null
-@@ -1,24 +0,0 @@
--// Copyright 2009 The RE2 Authors. All Rights Reserved.
--// Use of this source code is governed by a BSD-style
--// license that can be found in the LICENSE file.
--
--#include "util/util.h"
--#include "util/valgrind.h"
--
--namespace re2 {
--
--static bool checkValgrind() {
--#ifdef RUNNING_ON_VALGRIND
-- return RUNNING_ON_VALGRIND;
--#else
-- return false;
--#endif
--}
--
--static const int valgrind = checkValgrind();
--
--int RunningOnValgrind() {
-- return valgrind;
--}
--
--} // namespace re2
-diff --git a/re2/util/valgrind.h b/re2/util/valgrind.h
-deleted file mode 100644
-index ca10b1a..0000000
---- a/re2/util/valgrind.h
-+++ /dev/null
-@@ -1,4517 +0,0 @@
--/* -*- c -*-
-- ----------------------------------------------------------------
--
-- Notice that the following BSD-style license applies to this one
-- file (valgrind.h) only. The rest of Valgrind is licensed under the
-- terms of the GNU General Public License, version 2, unless
-- otherwise indicated. See the COPYING file in the source
-- distribution for details.
--
-- ----------------------------------------------------------------
--
-- This file is part of Valgrind, a dynamic binary instrumentation
-- framework.
--
-- Copyright (C) 2000-2009 Julian Seward. All rights reserved.
--
-- Redistribution and use in source and binary forms, with or without
-- modification, are permitted provided that the following conditions
-- are met:
--
-- 1. Redistributions of source code must retain the above copyright
-- notice, this list of conditions and the following disclaimer.
--
-- 2. The origin of this software must not be misrepresented; you must
-- not claim that you wrote the original software. If you use this
-- software in a product, an acknowledgment in the product
-- documentation would be appreciated but is not required.
--
-- 3. Altered source versions must be plainly marked as such, and must
-- not be misrepresented as being the original software.
--
-- 4. The name of the author may not be used to endorse or promote
-- products derived from this software without specific prior written
-- permission.
--
-- THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
-- OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-- ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
-- DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
-- GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-- INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-- WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--
-- ----------------------------------------------------------------
--
-- Notice that the above BSD-style license applies to this one file
-- (valgrind.h) only. The entire rest of Valgrind is licensed under
-- the terms of the GNU General Public License, version 2. See the
-- COPYING file in the source distribution for details.
--
-- ----------------------------------------------------------------
--*/
--
--
--/* This file is for inclusion into client (your!) code.
--
-- You can use these macros to manipulate and query Valgrind's
-- execution inside your own programs.
--
-- The resulting executables will still run without Valgrind, just a
-- little bit more slowly than they otherwise would, but otherwise
-- unchanged. When not running on valgrind, each client request
-- consumes very few (eg. 7) instructions, so the resulting performance
-- loss is negligible unless you plan to execute client requests
-- millions of times per second. Nevertheless, if that is still a
-- problem, you can compile with the NVALGRIND symbol defined (gcc
-- -DNVALGRIND) so that client requests are not even compiled in. */
--
--#ifndef __VALGRIND_H
--#define __VALGRIND_H
--
--#include <stdarg.h>
--
--/* Nb: this file might be included in a file compiled with -ansi. So
-- we can't use C++ style "//" comments nor the "asm" keyword (instead
-- use "__asm__"). */
--
--/* Derive some tags indicating what the target platform is. Note
-- that in this file we're using the compiler's CPP symbols for
-- identifying architectures, which are different to the ones we use
-- within the rest of Valgrind. Note, __powerpc__ is active for both
-- 32 and 64-bit PPC, whereas __powerpc64__ is only active for the
-- latter (on Linux, that is).
--
-- Misc note: how to find out what's predefined in gcc by default:
-- gcc -Wp,-dM somefile.c
--*/
--#undef PLAT_ppc64_aix5
--#undef PLAT_ppc32_aix5
--#undef PLAT_x86_darwin
--#undef PLAT_amd64_darwin
--#undef PLAT_x86_linux
--#undef PLAT_amd64_linux
--#undef PLAT_ppc32_linux
--#undef PLAT_ppc64_linux
--#undef PLAT_arm_linux
--
--#if defined(_AIX) && defined(__64BIT__)
--# define PLAT_ppc64_aix5 1
--#elif defined(_AIX) && !defined(__64BIT__)
--# define PLAT_ppc32_aix5 1
--#elif defined(__APPLE__) && defined(__i386__)
--# define PLAT_x86_darwin 1
--#elif defined(__APPLE__) && defined(__x86_64__)
--# define PLAT_amd64_darwin 1
--#elif defined(__linux__) && defined(__i386__)
--# define PLAT_x86_linux 1
--#elif defined(__linux__) && defined(__x86_64__)
--# define PLAT_amd64_linux 1
--#elif defined(__linux__) && defined(__powerpc__) && !defined(__powerpc64__)
--# define PLAT_ppc32_linux 1
--#elif defined(__linux__) && defined(__powerpc__) && defined(__powerpc64__)
--# define PLAT_ppc64_linux 1
--#elif defined(__linux__) && defined(__arm__)
--# define PLAT_arm_linux 1
--#else
--/* If we're not compiling for our target platform, don't generate
-- any inline asms. */
--# if !defined(NVALGRIND)
--# define NVALGRIND 1
--# endif
--#endif
--
--
--/* ------------------------------------------------------------------ */
--/* ARCHITECTURE SPECIFICS for SPECIAL INSTRUCTIONS. There is nothing */
--/* in here of use to end-users -- skip to the next section. */
--/* ------------------------------------------------------------------ */
--
--#if defined(NVALGRIND)
--
--/* Define NVALGRIND to completely remove the Valgrind magic sequence
-- from the compiled code (analogous to NDEBUG's effects on
-- assert()) */
--#define VALGRIND_DO_CLIENT_REQUEST( \
-- _zzq_rlval, _zzq_default, _zzq_request, \
-- _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \
-- { \
-- (_zzq_rlval) = (_zzq_default); \
-- }
--
--#else /* ! NVALGRIND */
--
--/* The following defines the magic code sequences which the JITter
-- spots and handles magically. Don't look too closely at them as
-- they will rot your brain.
--
-- The assembly code sequences for all architectures is in this one
-- file. This is because this file must be stand-alone, and we don't
-- want to have multiple files.
--
-- For VALGRIND_DO_CLIENT_REQUEST, we must ensure that the default
-- value gets put in the return slot, so that everything works when
-- this is executed not under Valgrind. Args are passed in a memory
-- block, and so there's no intrinsic limit to the number that could
-- be passed, but it's currently five.
--
-- The macro args are:
-- _zzq_rlval result lvalue
-- _zzq_default default value (result returned when running on real CPU)
-- _zzq_request request code
-- _zzq_arg1..5 request params
--
-- The other two macros are used to support function wrapping, and are
-- a lot simpler. VALGRIND_GET_NR_CONTEXT returns the value of the
-- guest's NRADDR pseudo-register and whatever other information is
-- needed to safely run the call original from the wrapper: on
-- ppc64-linux, the R2 value at the divert point is also needed. This
-- information is abstracted into a user-visible type, OrigFn.
--
-- VALGRIND_CALL_NOREDIR_* behaves the same as the following on the
-- guest, but guarantees that the branch instruction will not be
-- redirected: x86: call *%eax, amd64: call *%rax, ppc32/ppc64:
-- branch-and-link-to-r11. VALGRIND_CALL_NOREDIR is just text, not a
-- complete inline asm, since it needs to be combined with more magic
-- inline asm stuff to be useful.
--*/
--
--/* ------------------------- x86-{linux,darwin} ---------------- */
--
--#if defined(PLAT_x86_linux) || defined(PLAT_x86_darwin)
--
--typedef
-- struct {
-- unsigned int nraddr; /* where's the code? */
-- }
-- OrigFn;
--
--#define __SPECIAL_INSTRUCTION_PREAMBLE \
-- "roll $3, %%edi ; roll $13, %%edi\n\t" \
-- "roll $29, %%edi ; roll $19, %%edi\n\t"
--
--#define VALGRIND_DO_CLIENT_REQUEST( \
-- _zzq_rlval, _zzq_default, _zzq_request, \
-- _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \
-- { volatile unsigned int _zzq_args[6]; \
-- volatile unsigned int _zzq_result; \
-- _zzq_args[0] = (unsigned int)(_zzq_request); \
-- _zzq_args[1] = (unsigned int)(_zzq_arg1); \
-- _zzq_args[2] = (unsigned int)(_zzq_arg2); \
-- _zzq_args[3] = (unsigned int)(_zzq_arg3); \
-- _zzq_args[4] = (unsigned int)(_zzq_arg4); \
-- _zzq_args[5] = (unsigned int)(_zzq_arg5); \
-- __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
-- /* %EDX = client_request ( %EAX ) */ \
-- "xchgl %%ebx,%%ebx" \
-- : "=d" (_zzq_result) \
-- : "a" (&_zzq_args[0]), "0" (_zzq_default) \
-- : "cc", "memory" \
-- ); \
-- _zzq_rlval = _zzq_result; \
-- }
--
--#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \
-- { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \
-- volatile unsigned int __addr; \
-- __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
-- /* %EAX = guest_NRADDR */ \
-- "xchgl %%ecx,%%ecx" \
-- : "=a" (__addr) \
-- : \
-- : "cc", "memory" \
-- ); \
-- _zzq_orig->nraddr = __addr; \
-- }
--
--#define VALGRIND_CALL_NOREDIR_EAX \
-- __SPECIAL_INSTRUCTION_PREAMBLE \
-- /* call-noredir *%EAX */ \
-- "xchgl %%edx,%%edx\n\t"
--#endif /* PLAT_x86_linux || PLAT_x86_darwin */
--
--/* ------------------------ amd64-{linux,darwin} --------------- */
--
--#if defined(PLAT_amd64_linux) || defined(PLAT_amd64_darwin)
--
--typedef
-- struct {
-- unsigned long long int nraddr; /* where's the code? */
-- }
-- OrigFn;
--
--#define __SPECIAL_INSTRUCTION_PREAMBLE \
-- "rolq $3, %%rdi ; rolq $13, %%rdi\n\t" \
-- "rolq $61, %%rdi ; rolq $51, %%rdi\n\t"
--
--#define VALGRIND_DO_CLIENT_REQUEST( \
-- _zzq_rlval, _zzq_default, _zzq_request, \
-- _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \
-- { volatile unsigned long long int _zzq_args[6]; \
-- volatile unsigned long long int _zzq_result; \
-- _zzq_args[0] = (unsigned long long int)(_zzq_request); \
-- _zzq_args[1] = (unsigned long long int)(_zzq_arg1); \
-- _zzq_args[2] = (unsigned long long int)(_zzq_arg2); \
-- _zzq_args[3] = (unsigned long long int)(_zzq_arg3); \
-- _zzq_args[4] = (unsigned long long int)(_zzq_arg4); \
-- _zzq_args[5] = (unsigned long long int)(_zzq_arg5); \
-- __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
-- /* %RDX = client_request ( %RAX ) */ \
-- "xchgq %%rbx,%%rbx" \
-- : "=d" (_zzq_result) \
-- : "a" (&_zzq_args[0]), "0" (_zzq_default) \
-- : "cc", "memory" \
-- ); \
-- _zzq_rlval = _zzq_result; \
-- }
--
--#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \
-- { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \
-- volatile unsigned long long int __addr; \
-- __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
-- /* %RAX = guest_NRADDR */ \
-- "xchgq %%rcx,%%rcx" \
-- : "=a" (__addr) \
-- : \
-- : "cc", "memory" \
-- ); \
-- _zzq_orig->nraddr = __addr; \
-- }
--
--#define VALGRIND_CALL_NOREDIR_RAX \
-- __SPECIAL_INSTRUCTION_PREAMBLE \
-- /* call-noredir *%RAX */ \
-- "xchgq %%rdx,%%rdx\n\t"
--#endif /* PLAT_amd64_linux || PLAT_amd64_darwin */
--
--/* ------------------------ ppc32-linux ------------------------ */
--
--#if defined(PLAT_ppc32_linux)
--
--typedef
-- struct {
-- unsigned int nraddr; /* where's the code? */
-- }
-- OrigFn;
--
--#define __SPECIAL_INSTRUCTION_PREAMBLE \
-- "rlwinm 0,0,3,0,0 ; rlwinm 0,0,13,0,0\n\t" \
-- "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t"
--
--#define VALGRIND_DO_CLIENT_REQUEST( \
-- _zzq_rlval, _zzq_default, _zzq_request, \
-- _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \
-- \
-- { unsigned int _zzq_args[6]; \
-- unsigned int _zzq_result; \
-- unsigned int* _zzq_ptr; \
-- _zzq_args[0] = (unsigned int)(_zzq_request); \
-- _zzq_args[1] = (unsigned int)(_zzq_arg1); \
-- _zzq_args[2] = (unsigned int)(_zzq_arg2); \
-- _zzq_args[3] = (unsigned int)(_zzq_arg3); \
-- _zzq_args[4] = (unsigned int)(_zzq_arg4); \
-- _zzq_args[5] = (unsigned int)(_zzq_arg5); \
-- _zzq_ptr = _zzq_args; \
-- __asm__ volatile("mr 3,%1\n\t" /*default*/ \
-- "mr 4,%2\n\t" /*ptr*/ \
-- __SPECIAL_INSTRUCTION_PREAMBLE \
-- /* %R3 = client_request ( %R4 ) */ \
-- "or 1,1,1\n\t" \
-- "mr %0,3" /*result*/ \
-- : "=b" (_zzq_result) \
-- : "b" (_zzq_default), "b" (_zzq_ptr) \
-- : "cc", "memory", "r3", "r4"); \
-- _zzq_rlval = _zzq_result; \
-- }
--
--#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \
-- { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \
-- unsigned int __addr; \
-- __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
-- /* %R3 = guest_NRADDR */ \
-- "or 2,2,2\n\t" \
-- "mr %0,3" \
-- : "=b" (__addr) \
-- : \
-- : "cc", "memory", "r3" \
-- ); \
-- _zzq_orig->nraddr = __addr; \
-- }
--
--#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- __SPECIAL_INSTRUCTION_PREAMBLE \
-- /* branch-and-link-to-noredir *%R11 */ \
-- "or 3,3,3\n\t"
--#endif /* PLAT_ppc32_linux */
--
--/* ------------------------ ppc64-linux ------------------------ */
--
--#if defined(PLAT_ppc64_linux)
--
--typedef
-- struct {
-- unsigned long long int nraddr; /* where's the code? */
-- unsigned long long int r2; /* what tocptr do we need? */
-- }
-- OrigFn;
--
--#define __SPECIAL_INSTRUCTION_PREAMBLE \
-- "rotldi 0,0,3 ; rotldi 0,0,13\n\t" \
-- "rotldi 0,0,61 ; rotldi 0,0,51\n\t"
--
--#define VALGRIND_DO_CLIENT_REQUEST( \
-- _zzq_rlval, _zzq_default, _zzq_request, \
-- _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \
-- \
-- { unsigned long long int _zzq_args[6]; \
-- register unsigned long long int _zzq_result __asm__("r3"); \
-- register unsigned long long int* _zzq_ptr __asm__("r4"); \
-- _zzq_args[0] = (unsigned long long int)(_zzq_request); \
-- _zzq_args[1] = (unsigned long long int)(_zzq_arg1); \
-- _zzq_args[2] = (unsigned long long int)(_zzq_arg2); \
-- _zzq_args[3] = (unsigned long long int)(_zzq_arg3); \
-- _zzq_args[4] = (unsigned long long int)(_zzq_arg4); \
-- _zzq_args[5] = (unsigned long long int)(_zzq_arg5); \
-- _zzq_ptr = _zzq_args; \
-- __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
-- /* %R3 = client_request ( %R4 ) */ \
-- "or 1,1,1" \
-- : "=r" (_zzq_result) \
-- : "0" (_zzq_default), "r" (_zzq_ptr) \
-- : "cc", "memory"); \
-- _zzq_rlval = _zzq_result; \
-- }
--
--#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \
-- { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \
-- register unsigned long long int __addr __asm__("r3"); \
-- __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
-- /* %R3 = guest_NRADDR */ \
-- "or 2,2,2" \
-- : "=r" (__addr) \
-- : \
-- : "cc", "memory" \
-- ); \
-- _zzq_orig->nraddr = __addr; \
-- __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
-- /* %R3 = guest_NRADDR_GPR2 */ \
-- "or 4,4,4" \
-- : "=r" (__addr) \
-- : \
-- : "cc", "memory" \
-- ); \
-- _zzq_orig->r2 = __addr; \
-- }
--
--#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- __SPECIAL_INSTRUCTION_PREAMBLE \
-- /* branch-and-link-to-noredir *%R11 */ \
-- "or 3,3,3\n\t"
--
--#endif /* PLAT_ppc64_linux */
--
--/* ------------------------- arm-linux ------------------------- */
--
--#if defined(PLAT_arm_linux)
--
--typedef
-- struct {
-- unsigned int nraddr; /* where's the code? */
-- }
-- OrigFn;
--
--#define __SPECIAL_INSTRUCTION_PREAMBLE \
-- "mov r12, r12, ror #3 ; mov r12, r12, ror #13 \n\t" \
-- "mov r12, r12, ror #29 ; mov r12, r12, ror #19 \n\t"
--
--#define VALGRIND_DO_CLIENT_REQUEST( \
-- _zzq_rlval, _zzq_default, _zzq_request, \
-- _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \
-- \
-- { volatile unsigned int _zzq_args[6]; \
-- volatile unsigned int _zzq_result; \
-- _zzq_args[0] = (unsigned int)(_zzq_request); \
-- _zzq_args[1] = (unsigned int)(_zzq_arg1); \
-- _zzq_args[2] = (unsigned int)(_zzq_arg2); \
-- _zzq_args[3] = (unsigned int)(_zzq_arg3); \
-- _zzq_args[4] = (unsigned int)(_zzq_arg4); \
-- _zzq_args[5] = (unsigned int)(_zzq_arg5); \
-- __asm__ volatile("mov r3, %1\n\t" /*default*/ \
-- "mov r4, %2\n\t" /*ptr*/ \
-- __SPECIAL_INSTRUCTION_PREAMBLE \
-- /* R3 = client_request ( R4 ) */ \
-- "orr r10, r10, r10\n\t" \
-- "mov %0, r3" /*result*/ \
-- : "=r" (_zzq_result) \
-- : "r" (_zzq_default), "r" (&_zzq_args[0]) \
-- : "cc","memory", "r3", "r4"); \
-- _zzq_rlval = _zzq_result; \
-- }
--
--#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \
-- { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \
-- unsigned int __addr; \
-- __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
-- /* R3 = guest_NRADDR */ \
-- "orr r11, r11, r11\n\t" \
-- "mov %0, r3" \
-- : "=r" (__addr) \
-- : \
-- : "cc", "memory", "r3" \
-- ); \
-- _zzq_orig->nraddr = __addr; \
-- }
--
--#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \
-- __SPECIAL_INSTRUCTION_PREAMBLE \
-- /* branch-and-link-to-noredir *%R4 */ \
-- "orr r12, r12, r12\n\t"
--
--#endif /* PLAT_arm_linux */
--
--/* ------------------------ ppc32-aix5 ------------------------- */
--
--#if defined(PLAT_ppc32_aix5)
--
--typedef
-- struct {
-- unsigned int nraddr; /* where's the code? */
-- unsigned int r2; /* what tocptr do we need? */
-- }
-- OrigFn;
--
--#define __SPECIAL_INSTRUCTION_PREAMBLE \
-- "rlwinm 0,0,3,0,0 ; rlwinm 0,0,13,0,0\n\t" \
-- "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t"
--
--#define VALGRIND_DO_CLIENT_REQUEST( \
-- _zzq_rlval, _zzq_default, _zzq_request, \
-- _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \
-- \
-- { unsigned int _zzq_args[7]; \
-- register unsigned int _zzq_result; \
-- register unsigned int* _zzq_ptr; \
-- _zzq_args[0] = (unsigned int)(_zzq_request); \
-- _zzq_args[1] = (unsigned int)(_zzq_arg1); \
-- _zzq_args[2] = (unsigned int)(_zzq_arg2); \
-- _zzq_args[3] = (unsigned int)(_zzq_arg3); \
-- _zzq_args[4] = (unsigned int)(_zzq_arg4); \
-- _zzq_args[5] = (unsigned int)(_zzq_arg5); \
-- _zzq_args[6] = (unsigned int)(_zzq_default); \
-- _zzq_ptr = _zzq_args; \
-- __asm__ volatile("mr 4,%1\n\t" \
-- "lwz 3, 24(4)\n\t" \
-- __SPECIAL_INSTRUCTION_PREAMBLE \
-- /* %R3 = client_request ( %R4 ) */ \
-- "or 1,1,1\n\t" \
-- "mr %0,3" \
-- : "=b" (_zzq_result) \
-- : "b" (_zzq_ptr) \
-- : "r3", "r4", "cc", "memory"); \
-- _zzq_rlval = _zzq_result; \
-- }
--
--#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \
-- { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \
-- register unsigned int __addr; \
-- __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
-- /* %R3 = guest_NRADDR */ \
-- "or 2,2,2\n\t" \
-- "mr %0,3" \
-- : "=b" (__addr) \
-- : \
-- : "r3", "cc", "memory" \
-- ); \
-- _zzq_orig->nraddr = __addr; \
-- __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
-- /* %R3 = guest_NRADDR_GPR2 */ \
-- "or 4,4,4\n\t" \
-- "mr %0,3" \
-- : "=b" (__addr) \
-- : \
-- : "r3", "cc", "memory" \
-- ); \
-- _zzq_orig->r2 = __addr; \
-- }
--
--#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- __SPECIAL_INSTRUCTION_PREAMBLE \
-- /* branch-and-link-to-noredir *%R11 */ \
-- "or 3,3,3\n\t"
--
--#endif /* PLAT_ppc32_aix5 */
--
--/* ------------------------ ppc64-aix5 ------------------------- */
--
--#if defined(PLAT_ppc64_aix5)
--
--typedef
-- struct {
-- unsigned long long int nraddr; /* where's the code? */
-- unsigned long long int r2; /* what tocptr do we need? */
-- }
-- OrigFn;
--
--#define __SPECIAL_INSTRUCTION_PREAMBLE \
-- "rotldi 0,0,3 ; rotldi 0,0,13\n\t" \
-- "rotldi 0,0,61 ; rotldi 0,0,51\n\t"
--
--#define VALGRIND_DO_CLIENT_REQUEST( \
-- _zzq_rlval, _zzq_default, _zzq_request, \
-- _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \
-- \
-- { unsigned long long int _zzq_args[7]; \
-- register unsigned long long int _zzq_result; \
-- register unsigned long long int* _zzq_ptr; \
-- _zzq_args[0] = (unsigned int long long)(_zzq_request); \
-- _zzq_args[1] = (unsigned int long long)(_zzq_arg1); \
-- _zzq_args[2] = (unsigned int long long)(_zzq_arg2); \
-- _zzq_args[3] = (unsigned int long long)(_zzq_arg3); \
-- _zzq_args[4] = (unsigned int long long)(_zzq_arg4); \
-- _zzq_args[5] = (unsigned int long long)(_zzq_arg5); \
-- _zzq_args[6] = (unsigned int long long)(_zzq_default); \
-- _zzq_ptr = _zzq_args; \
-- __asm__ volatile("mr 4,%1\n\t" \
-- "ld 3, 48(4)\n\t" \
-- __SPECIAL_INSTRUCTION_PREAMBLE \
-- /* %R3 = client_request ( %R4 ) */ \
-- "or 1,1,1\n\t" \
-- "mr %0,3" \
-- : "=b" (_zzq_result) \
-- : "b" (_zzq_ptr) \
-- : "r3", "r4", "cc", "memory"); \
-- _zzq_rlval = _zzq_result; \
-- }
--
--#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \
-- { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \
-- register unsigned long long int __addr; \
-- __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
-- /* %R3 = guest_NRADDR */ \
-- "or 2,2,2\n\t" \
-- "mr %0,3" \
-- : "=b" (__addr) \
-- : \
-- : "r3", "cc", "memory" \
-- ); \
-- _zzq_orig->nraddr = __addr; \
-- __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
-- /* %R3 = guest_NRADDR_GPR2 */ \
-- "or 4,4,4\n\t" \
-- "mr %0,3" \
-- : "=b" (__addr) \
-- : \
-- : "r3", "cc", "memory" \
-- ); \
-- _zzq_orig->r2 = __addr; \
-- }
--
--#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- __SPECIAL_INSTRUCTION_PREAMBLE \
-- /* branch-and-link-to-noredir *%R11 */ \
-- "or 3,3,3\n\t"
--
--#endif /* PLAT_ppc64_aix5 */
--
--/* Insert assembly code for other platforms here... */
--
--#endif /* NVALGRIND */
--
--
--/* ------------------------------------------------------------------ */
--/* PLATFORM SPECIFICS for FUNCTION WRAPPING. This is all very */
--/* ugly. It's the least-worst tradeoff I can think of. */
--/* ------------------------------------------------------------------ */
--
--/* This section defines magic (a.k.a appalling-hack) macros for doing
-- guaranteed-no-redirection macros, so as to get from function
-- wrappers to the functions they are wrapping. The whole point is to
-- construct standard call sequences, but to do the call itself with a
-- special no-redirect call pseudo-instruction that the JIT
-- understands and handles specially. This section is long and
-- repetitious, and I can't see a way to make it shorter.
--
-- The naming scheme is as follows:
--
-- CALL_FN_{W,v}_{v,W,WW,WWW,WWWW,5W,6W,7W,etc}
--
-- 'W' stands for "word" and 'v' for "void". Hence there are
-- different macros for calling arity 0, 1, 2, 3, 4, etc, functions,
-- and for each, the possibility of returning a word-typed result, or
-- no result.
--*/
--
--/* Use these to write the name of your wrapper. NOTE: duplicates
-- VG_WRAP_FUNCTION_Z{U,Z} in pub_tool_redir.h. */
--
--/* Use an extra level of macroisation so as to ensure the soname/fnname
-- args are fully macro-expanded before pasting them together. */
--#define VG_CONCAT4(_aa,_bb,_cc,_dd) _aa##_bb##_cc##_dd
--
--#define I_WRAP_SONAME_FNNAME_ZU(soname,fnname) \
-- VG_CONCAT4(_vgwZU_,soname,_,fnname)
--
--#define I_WRAP_SONAME_FNNAME_ZZ(soname,fnname) \
-- VG_CONCAT4(_vgwZZ_,soname,_,fnname)
--
--/* Use this macro from within a wrapper function to collect the
-- context (address and possibly other info) of the original function.
-- Once you have that you can then use it in one of the CALL_FN_
-- macros. The type of the argument _lval is OrigFn. */
--#define VALGRIND_GET_ORIG_FN(_lval) VALGRIND_GET_NR_CONTEXT(_lval)
--
--/* Derivatives of the main macros below, for calling functions
-- returning void. */
--
--#define CALL_FN_v_v(fnptr) \
-- do { volatile unsigned long _junk; \
-- CALL_FN_W_v(_junk,fnptr); } while (0)
--
--#define CALL_FN_v_W(fnptr, arg1) \
-- do { volatile unsigned long _junk; \
-- CALL_FN_W_W(_junk,fnptr,arg1); } while (0)
--
--#define CALL_FN_v_WW(fnptr, arg1,arg2) \
-- do { volatile unsigned long _junk; \
-- CALL_FN_W_WW(_junk,fnptr,arg1,arg2); } while (0)
--
--#define CALL_FN_v_WWW(fnptr, arg1,arg2,arg3) \
-- do { volatile unsigned long _junk; \
-- CALL_FN_W_WWW(_junk,fnptr,arg1,arg2,arg3); } while (0)
--
--#define CALL_FN_v_WWWW(fnptr, arg1,arg2,arg3,arg4) \
-- do { volatile unsigned long _junk; \
-- CALL_FN_W_WWWW(_junk,fnptr,arg1,arg2,arg3,arg4); } while (0)
--
--#define CALL_FN_v_5W(fnptr, arg1,arg2,arg3,arg4,arg5) \
-- do { volatile unsigned long _junk; \
-- CALL_FN_W_5W(_junk,fnptr,arg1,arg2,arg3,arg4,arg5); } while (0)
--
--#define CALL_FN_v_6W(fnptr, arg1,arg2,arg3,arg4,arg5,arg6) \
-- do { volatile unsigned long _junk; \
-- CALL_FN_W_6W(_junk,fnptr,arg1,arg2,arg3,arg4,arg5,arg6); } while (0)
--
--#define CALL_FN_v_7W(fnptr, arg1,arg2,arg3,arg4,arg5,arg6,arg7) \
-- do { volatile unsigned long _junk; \
-- CALL_FN_W_7W(_junk,fnptr,arg1,arg2,arg3,arg4,arg5,arg6,arg7); } while (0)
--
--/* ------------------------- x86-{linux,darwin} ---------------- */
--
--#if defined(PLAT_x86_linux) || defined(PLAT_x86_darwin)
--
--/* These regs are trashed by the hidden call. No need to mention eax
-- as gcc can already see that, plus causes gcc to bomb. */
--#define __CALLER_SAVED_REGS /*"eax"*/ "ecx", "edx"
--
--/* These CALL_FN_ macros assume that on x86-linux, sizeof(unsigned
-- long) == 4. */
--
--#define CALL_FN_W_v(lval, orig) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[1]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- __asm__ volatile( \
-- "movl (%%eax), %%eax\n\t" /* target->%eax */ \
-- VALGRIND_CALL_NOREDIR_EAX \
-- : /*out*/ "=a" (_res) \
-- : /*in*/ "a" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_W(lval, orig, arg1) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[2]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- __asm__ volatile( \
-- "pushl 4(%%eax)\n\t" \
-- "movl (%%eax), %%eax\n\t" /* target->%eax */ \
-- VALGRIND_CALL_NOREDIR_EAX \
-- "addl $4, %%esp\n" \
-- : /*out*/ "=a" (_res) \
-- : /*in*/ "a" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_WW(lval, orig, arg1,arg2) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- _argvec[2] = (unsigned long)(arg2); \
-- __asm__ volatile( \
-- "pushl 8(%%eax)\n\t" \
-- "pushl 4(%%eax)\n\t" \
-- "movl (%%eax), %%eax\n\t" /* target->%eax */ \
-- VALGRIND_CALL_NOREDIR_EAX \
-- "addl $8, %%esp\n" \
-- : /*out*/ "=a" (_res) \
-- : /*in*/ "a" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[4]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- _argvec[2] = (unsigned long)(arg2); \
-- _argvec[3] = (unsigned long)(arg3); \
-- __asm__ volatile( \
-- "pushl 12(%%eax)\n\t" \
-- "pushl 8(%%eax)\n\t" \
-- "pushl 4(%%eax)\n\t" \
-- "movl (%%eax), %%eax\n\t" /* target->%eax */ \
-- VALGRIND_CALL_NOREDIR_EAX \
-- "addl $12, %%esp\n" \
-- : /*out*/ "=a" (_res) \
-- : /*in*/ "a" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[5]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- _argvec[2] = (unsigned long)(arg2); \
-- _argvec[3] = (unsigned long)(arg3); \
-- _argvec[4] = (unsigned long)(arg4); \
-- __asm__ volatile( \
-- "pushl 16(%%eax)\n\t" \
-- "pushl 12(%%eax)\n\t" \
-- "pushl 8(%%eax)\n\t" \
-- "pushl 4(%%eax)\n\t" \
-- "movl (%%eax), %%eax\n\t" /* target->%eax */ \
-- VALGRIND_CALL_NOREDIR_EAX \
-- "addl $16, %%esp\n" \
-- : /*out*/ "=a" (_res) \
-- : /*in*/ "a" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[6]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- _argvec[2] = (unsigned long)(arg2); \
-- _argvec[3] = (unsigned long)(arg3); \
-- _argvec[4] = (unsigned long)(arg4); \
-- _argvec[5] = (unsigned long)(arg5); \
-- __asm__ volatile( \
-- "pushl 20(%%eax)\n\t" \
-- "pushl 16(%%eax)\n\t" \
-- "pushl 12(%%eax)\n\t" \
-- "pushl 8(%%eax)\n\t" \
-- "pushl 4(%%eax)\n\t" \
-- "movl (%%eax), %%eax\n\t" /* target->%eax */ \
-- VALGRIND_CALL_NOREDIR_EAX \
-- "addl $20, %%esp\n" \
-- : /*out*/ "=a" (_res) \
-- : /*in*/ "a" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[7]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- _argvec[2] = (unsigned long)(arg2); \
-- _argvec[3] = (unsigned long)(arg3); \
-- _argvec[4] = (unsigned long)(arg4); \
-- _argvec[5] = (unsigned long)(arg5); \
-- _argvec[6] = (unsigned long)(arg6); \
-- __asm__ volatile( \
-- "pushl 24(%%eax)\n\t" \
-- "pushl 20(%%eax)\n\t" \
-- "pushl 16(%%eax)\n\t" \
-- "pushl 12(%%eax)\n\t" \
-- "pushl 8(%%eax)\n\t" \
-- "pushl 4(%%eax)\n\t" \
-- "movl (%%eax), %%eax\n\t" /* target->%eax */ \
-- VALGRIND_CALL_NOREDIR_EAX \
-- "addl $24, %%esp\n" \
-- : /*out*/ "=a" (_res) \
-- : /*in*/ "a" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[8]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- _argvec[2] = (unsigned long)(arg2); \
-- _argvec[3] = (unsigned long)(arg3); \
-- _argvec[4] = (unsigned long)(arg4); \
-- _argvec[5] = (unsigned long)(arg5); \
-- _argvec[6] = (unsigned long)(arg6); \
-- _argvec[7] = (unsigned long)(arg7); \
-- __asm__ volatile( \
-- "pushl 28(%%eax)\n\t" \
-- "pushl 24(%%eax)\n\t" \
-- "pushl 20(%%eax)\n\t" \
-- "pushl 16(%%eax)\n\t" \
-- "pushl 12(%%eax)\n\t" \
-- "pushl 8(%%eax)\n\t" \
-- "pushl 4(%%eax)\n\t" \
-- "movl (%%eax), %%eax\n\t" /* target->%eax */ \
-- VALGRIND_CALL_NOREDIR_EAX \
-- "addl $28, %%esp\n" \
-- : /*out*/ "=a" (_res) \
-- : /*in*/ "a" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7,arg8) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[9]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- _argvec[2] = (unsigned long)(arg2); \
-- _argvec[3] = (unsigned long)(arg3); \
-- _argvec[4] = (unsigned long)(arg4); \
-- _argvec[5] = (unsigned long)(arg5); \
-- _argvec[6] = (unsigned long)(arg6); \
-- _argvec[7] = (unsigned long)(arg7); \
-- _argvec[8] = (unsigned long)(arg8); \
-- __asm__ volatile( \
-- "pushl 32(%%eax)\n\t" \
-- "pushl 28(%%eax)\n\t" \
-- "pushl 24(%%eax)\n\t" \
-- "pushl 20(%%eax)\n\t" \
-- "pushl 16(%%eax)\n\t" \
-- "pushl 12(%%eax)\n\t" \
-- "pushl 8(%%eax)\n\t" \
-- "pushl 4(%%eax)\n\t" \
-- "movl (%%eax), %%eax\n\t" /* target->%eax */ \
-- VALGRIND_CALL_NOREDIR_EAX \
-- "addl $32, %%esp\n" \
-- : /*out*/ "=a" (_res) \
-- : /*in*/ "a" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7,arg8,arg9) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[10]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- _argvec[2] = (unsigned long)(arg2); \
-- _argvec[3] = (unsigned long)(arg3); \
-- _argvec[4] = (unsigned long)(arg4); \
-- _argvec[5] = (unsigned long)(arg5); \
-- _argvec[6] = (unsigned long)(arg6); \
-- _argvec[7] = (unsigned long)(arg7); \
-- _argvec[8] = (unsigned long)(arg8); \
-- _argvec[9] = (unsigned long)(arg9); \
-- __asm__ volatile( \
-- "pushl 36(%%eax)\n\t" \
-- "pushl 32(%%eax)\n\t" \
-- "pushl 28(%%eax)\n\t" \
-- "pushl 24(%%eax)\n\t" \
-- "pushl 20(%%eax)\n\t" \
-- "pushl 16(%%eax)\n\t" \
-- "pushl 12(%%eax)\n\t" \
-- "pushl 8(%%eax)\n\t" \
-- "pushl 4(%%eax)\n\t" \
-- "movl (%%eax), %%eax\n\t" /* target->%eax */ \
-- VALGRIND_CALL_NOREDIR_EAX \
-- "addl $36, %%esp\n" \
-- : /*out*/ "=a" (_res) \
-- : /*in*/ "a" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7,arg8,arg9,arg10) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[11]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- _argvec[2] = (unsigned long)(arg2); \
-- _argvec[3] = (unsigned long)(arg3); \
-- _argvec[4] = (unsigned long)(arg4); \
-- _argvec[5] = (unsigned long)(arg5); \
-- _argvec[6] = (unsigned long)(arg6); \
-- _argvec[7] = (unsigned long)(arg7); \
-- _argvec[8] = (unsigned long)(arg8); \
-- _argvec[9] = (unsigned long)(arg9); \
-- _argvec[10] = (unsigned long)(arg10); \
-- __asm__ volatile( \
-- "pushl 40(%%eax)\n\t" \
-- "pushl 36(%%eax)\n\t" \
-- "pushl 32(%%eax)\n\t" \
-- "pushl 28(%%eax)\n\t" \
-- "pushl 24(%%eax)\n\t" \
-- "pushl 20(%%eax)\n\t" \
-- "pushl 16(%%eax)\n\t" \
-- "pushl 12(%%eax)\n\t" \
-- "pushl 8(%%eax)\n\t" \
-- "pushl 4(%%eax)\n\t" \
-- "movl (%%eax), %%eax\n\t" /* target->%eax */ \
-- VALGRIND_CALL_NOREDIR_EAX \
-- "addl $40, %%esp\n" \
-- : /*out*/ "=a" (_res) \
-- : /*in*/ "a" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5, \
-- arg6,arg7,arg8,arg9,arg10, \
-- arg11) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[12]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- _argvec[2] = (unsigned long)(arg2); \
-- _argvec[3] = (unsigned long)(arg3); \
-- _argvec[4] = (unsigned long)(arg4); \
-- _argvec[5] = (unsigned long)(arg5); \
-- _argvec[6] = (unsigned long)(arg6); \
-- _argvec[7] = (unsigned long)(arg7); \
-- _argvec[8] = (unsigned long)(arg8); \
-- _argvec[9] = (unsigned long)(arg9); \
-- _argvec[10] = (unsigned long)(arg10); \
-- _argvec[11] = (unsigned long)(arg11); \
-- __asm__ volatile( \
-- "pushl 44(%%eax)\n\t" \
-- "pushl 40(%%eax)\n\t" \
-- "pushl 36(%%eax)\n\t" \
-- "pushl 32(%%eax)\n\t" \
-- "pushl 28(%%eax)\n\t" \
-- "pushl 24(%%eax)\n\t" \
-- "pushl 20(%%eax)\n\t" \
-- "pushl 16(%%eax)\n\t" \
-- "pushl 12(%%eax)\n\t" \
-- "pushl 8(%%eax)\n\t" \
-- "pushl 4(%%eax)\n\t" \
-- "movl (%%eax), %%eax\n\t" /* target->%eax */ \
-- VALGRIND_CALL_NOREDIR_EAX \
-- "addl $44, %%esp\n" \
-- : /*out*/ "=a" (_res) \
-- : /*in*/ "a" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5, \
-- arg6,arg7,arg8,arg9,arg10, \
-- arg11,arg12) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[13]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- _argvec[2] = (unsigned long)(arg2); \
-- _argvec[3] = (unsigned long)(arg3); \
-- _argvec[4] = (unsigned long)(arg4); \
-- _argvec[5] = (unsigned long)(arg5); \
-- _argvec[6] = (unsigned long)(arg6); \
-- _argvec[7] = (unsigned long)(arg7); \
-- _argvec[8] = (unsigned long)(arg8); \
-- _argvec[9] = (unsigned long)(arg9); \
-- _argvec[10] = (unsigned long)(arg10); \
-- _argvec[11] = (unsigned long)(arg11); \
-- _argvec[12] = (unsigned long)(arg12); \
-- __asm__ volatile( \
-- "pushl 48(%%eax)\n\t" \
-- "pushl 44(%%eax)\n\t" \
-- "pushl 40(%%eax)\n\t" \
-- "pushl 36(%%eax)\n\t" \
-- "pushl 32(%%eax)\n\t" \
-- "pushl 28(%%eax)\n\t" \
-- "pushl 24(%%eax)\n\t" \
-- "pushl 20(%%eax)\n\t" \
-- "pushl 16(%%eax)\n\t" \
-- "pushl 12(%%eax)\n\t" \
-- "pushl 8(%%eax)\n\t" \
-- "pushl 4(%%eax)\n\t" \
-- "movl (%%eax), %%eax\n\t" /* target->%eax */ \
-- VALGRIND_CALL_NOREDIR_EAX \
-- "addl $48, %%esp\n" \
-- : /*out*/ "=a" (_res) \
-- : /*in*/ "a" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#endif /* PLAT_x86_linux || PLAT_x86_darwin */
--
--/* ------------------------ amd64-{linux,darwin} --------------- */
--
--#if defined(PLAT_amd64_linux) || defined(PLAT_amd64_darwin)
--
--/* ARGREGS: rdi rsi rdx rcx r8 r9 (the rest on stack in R-to-L order) */
--
--/* These regs are trashed by the hidden call. */
--#define __CALLER_SAVED_REGS /*"rax",*/ "rcx", "rdx", "rsi", \
-- "rdi", "r8", "r9", "r10", "r11"
--
--/* These CALL_FN_ macros assume that on amd64-linux, sizeof(unsigned
-- long) == 8. */
--
--/* NB 9 Sept 07. There is a nasty kludge here in all these CALL_FN_
-- macros. In order not to trash the stack redzone, we need to drop
-- %rsp by 128 before the hidden call, and restore afterwards. The
-- nastyness is that it is only by luck that the stack still appears
-- to be unwindable during the hidden call - since then the behaviour
-- of any routine using this macro does not match what the CFI data
-- says. Sigh.
--
-- Why is this important? Imagine that a wrapper has a stack
-- allocated local, and passes to the hidden call, a pointer to it.
-- Because gcc does not know about the hidden call, it may allocate
-- that local in the redzone. Unfortunately the hidden call may then
-- trash it before it comes to use it. So we must step clear of the
-- redzone, for the duration of the hidden call, to make it safe.
--
-- Probably the same problem afflicts the other redzone-style ABIs too
-- (ppc64-linux, ppc32-aix5, ppc64-aix5); but for those, the stack is
-- self describing (none of this CFI nonsense) so at least messing
-- with the stack pointer doesn't give a danger of non-unwindable
-- stack. */
--
--#define CALL_FN_W_v(lval, orig) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[1]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- __asm__ volatile( \
-- "subq $128,%%rsp\n\t" \
-- "movq (%%rax), %%rax\n\t" /* target->%rax */ \
-- VALGRIND_CALL_NOREDIR_RAX \
-- "addq $128,%%rsp\n\t" \
-- : /*out*/ "=a" (_res) \
-- : /*in*/ "a" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_W(lval, orig, arg1) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[2]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- __asm__ volatile( \
-- "subq $128,%%rsp\n\t" \
-- "movq 8(%%rax), %%rdi\n\t" \
-- "movq (%%rax), %%rax\n\t" /* target->%rax */ \
-- VALGRIND_CALL_NOREDIR_RAX \
-- "addq $128,%%rsp\n\t" \
-- : /*out*/ "=a" (_res) \
-- : /*in*/ "a" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_WW(lval, orig, arg1,arg2) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- _argvec[2] = (unsigned long)(arg2); \
-- __asm__ volatile( \
-- "subq $128,%%rsp\n\t" \
-- "movq 16(%%rax), %%rsi\n\t" \
-- "movq 8(%%rax), %%rdi\n\t" \
-- "movq (%%rax), %%rax\n\t" /* target->%rax */ \
-- VALGRIND_CALL_NOREDIR_RAX \
-- "addq $128,%%rsp\n\t" \
-- : /*out*/ "=a" (_res) \
-- : /*in*/ "a" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[4]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- _argvec[2] = (unsigned long)(arg2); \
-- _argvec[3] = (unsigned long)(arg3); \
-- __asm__ volatile( \
-- "subq $128,%%rsp\n\t" \
-- "movq 24(%%rax), %%rdx\n\t" \
-- "movq 16(%%rax), %%rsi\n\t" \
-- "movq 8(%%rax), %%rdi\n\t" \
-- "movq (%%rax), %%rax\n\t" /* target->%rax */ \
-- VALGRIND_CALL_NOREDIR_RAX \
-- "addq $128,%%rsp\n\t" \
-- : /*out*/ "=a" (_res) \
-- : /*in*/ "a" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[5]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- _argvec[2] = (unsigned long)(arg2); \
-- _argvec[3] = (unsigned long)(arg3); \
-- _argvec[4] = (unsigned long)(arg4); \
-- __asm__ volatile( \
-- "subq $128,%%rsp\n\t" \
-- "movq 32(%%rax), %%rcx\n\t" \
-- "movq 24(%%rax), %%rdx\n\t" \
-- "movq 16(%%rax), %%rsi\n\t" \
-- "movq 8(%%rax), %%rdi\n\t" \
-- "movq (%%rax), %%rax\n\t" /* target->%rax */ \
-- VALGRIND_CALL_NOREDIR_RAX \
-- "addq $128,%%rsp\n\t" \
-- : /*out*/ "=a" (_res) \
-- : /*in*/ "a" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[6]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- _argvec[2] = (unsigned long)(arg2); \
-- _argvec[3] = (unsigned long)(arg3); \
-- _argvec[4] = (unsigned long)(arg4); \
-- _argvec[5] = (unsigned long)(arg5); \
-- __asm__ volatile( \
-- "subq $128,%%rsp\n\t" \
-- "movq 40(%%rax), %%r8\n\t" \
-- "movq 32(%%rax), %%rcx\n\t" \
-- "movq 24(%%rax), %%rdx\n\t" \
-- "movq 16(%%rax), %%rsi\n\t" \
-- "movq 8(%%rax), %%rdi\n\t" \
-- "movq (%%rax), %%rax\n\t" /* target->%rax */ \
-- VALGRIND_CALL_NOREDIR_RAX \
-- "addq $128,%%rsp\n\t" \
-- : /*out*/ "=a" (_res) \
-- : /*in*/ "a" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[7]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- _argvec[2] = (unsigned long)(arg2); \
-- _argvec[3] = (unsigned long)(arg3); \
-- _argvec[4] = (unsigned long)(arg4); \
-- _argvec[5] = (unsigned long)(arg5); \
-- _argvec[6] = (unsigned long)(arg6); \
-- __asm__ volatile( \
-- "subq $128,%%rsp\n\t" \
-- "movq 48(%%rax), %%r9\n\t" \
-- "movq 40(%%rax), %%r8\n\t" \
-- "movq 32(%%rax), %%rcx\n\t" \
-- "movq 24(%%rax), %%rdx\n\t" \
-- "movq 16(%%rax), %%rsi\n\t" \
-- "movq 8(%%rax), %%rdi\n\t" \
-- "movq (%%rax), %%rax\n\t" /* target->%rax */ \
-- "addq $128,%%rsp\n\t" \
-- VALGRIND_CALL_NOREDIR_RAX \
-- : /*out*/ "=a" (_res) \
-- : /*in*/ "a" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[8]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- _argvec[2] = (unsigned long)(arg2); \
-- _argvec[3] = (unsigned long)(arg3); \
-- _argvec[4] = (unsigned long)(arg4); \
-- _argvec[5] = (unsigned long)(arg5); \
-- _argvec[6] = (unsigned long)(arg6); \
-- _argvec[7] = (unsigned long)(arg7); \
-- __asm__ volatile( \
-- "subq $128,%%rsp\n\t" \
-- "pushq 56(%%rax)\n\t" \
-- "movq 48(%%rax), %%r9\n\t" \
-- "movq 40(%%rax), %%r8\n\t" \
-- "movq 32(%%rax), %%rcx\n\t" \
-- "movq 24(%%rax), %%rdx\n\t" \
-- "movq 16(%%rax), %%rsi\n\t" \
-- "movq 8(%%rax), %%rdi\n\t" \
-- "movq (%%rax), %%rax\n\t" /* target->%rax */ \
-- VALGRIND_CALL_NOREDIR_RAX \
-- "addq $8, %%rsp\n" \
-- "addq $128,%%rsp\n\t" \
-- : /*out*/ "=a" (_res) \
-- : /*in*/ "a" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7,arg8) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[9]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- _argvec[2] = (unsigned long)(arg2); \
-- _argvec[3] = (unsigned long)(arg3); \
-- _argvec[4] = (unsigned long)(arg4); \
-- _argvec[5] = (unsigned long)(arg5); \
-- _argvec[6] = (unsigned long)(arg6); \
-- _argvec[7] = (unsigned long)(arg7); \
-- _argvec[8] = (unsigned long)(arg8); \
-- __asm__ volatile( \
-- "subq $128,%%rsp\n\t" \
-- "pushq 64(%%rax)\n\t" \
-- "pushq 56(%%rax)\n\t" \
-- "movq 48(%%rax), %%r9\n\t" \
-- "movq 40(%%rax), %%r8\n\t" \
-- "movq 32(%%rax), %%rcx\n\t" \
-- "movq 24(%%rax), %%rdx\n\t" \
-- "movq 16(%%rax), %%rsi\n\t" \
-- "movq 8(%%rax), %%rdi\n\t" \
-- "movq (%%rax), %%rax\n\t" /* target->%rax */ \
-- VALGRIND_CALL_NOREDIR_RAX \
-- "addq $16, %%rsp\n" \
-- "addq $128,%%rsp\n\t" \
-- : /*out*/ "=a" (_res) \
-- : /*in*/ "a" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7,arg8,arg9) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[10]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- _argvec[2] = (unsigned long)(arg2); \
-- _argvec[3] = (unsigned long)(arg3); \
-- _argvec[4] = (unsigned long)(arg4); \
-- _argvec[5] = (unsigned long)(arg5); \
-- _argvec[6] = (unsigned long)(arg6); \
-- _argvec[7] = (unsigned long)(arg7); \
-- _argvec[8] = (unsigned long)(arg8); \
-- _argvec[9] = (unsigned long)(arg9); \
-- __asm__ volatile( \
-- "subq $128,%%rsp\n\t" \
-- "pushq 72(%%rax)\n\t" \
-- "pushq 64(%%rax)\n\t" \
-- "pushq 56(%%rax)\n\t" \
-- "movq 48(%%rax), %%r9\n\t" \
-- "movq 40(%%rax), %%r8\n\t" \
-- "movq 32(%%rax), %%rcx\n\t" \
-- "movq 24(%%rax), %%rdx\n\t" \
-- "movq 16(%%rax), %%rsi\n\t" \
-- "movq 8(%%rax), %%rdi\n\t" \
-- "movq (%%rax), %%rax\n\t" /* target->%rax */ \
-- VALGRIND_CALL_NOREDIR_RAX \
-- "addq $24, %%rsp\n" \
-- "addq $128,%%rsp\n\t" \
-- : /*out*/ "=a" (_res) \
-- : /*in*/ "a" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7,arg8,arg9,arg10) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[11]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- _argvec[2] = (unsigned long)(arg2); \
-- _argvec[3] = (unsigned long)(arg3); \
-- _argvec[4] = (unsigned long)(arg4); \
-- _argvec[5] = (unsigned long)(arg5); \
-- _argvec[6] = (unsigned long)(arg6); \
-- _argvec[7] = (unsigned long)(arg7); \
-- _argvec[8] = (unsigned long)(arg8); \
-- _argvec[9] = (unsigned long)(arg9); \
-- _argvec[10] = (unsigned long)(arg10); \
-- __asm__ volatile( \
-- "subq $128,%%rsp\n\t" \
-- "pushq 80(%%rax)\n\t" \
-- "pushq 72(%%rax)\n\t" \
-- "pushq 64(%%rax)\n\t" \
-- "pushq 56(%%rax)\n\t" \
-- "movq 48(%%rax), %%r9\n\t" \
-- "movq 40(%%rax), %%r8\n\t" \
-- "movq 32(%%rax), %%rcx\n\t" \
-- "movq 24(%%rax), %%rdx\n\t" \
-- "movq 16(%%rax), %%rsi\n\t" \
-- "movq 8(%%rax), %%rdi\n\t" \
-- "movq (%%rax), %%rax\n\t" /* target->%rax */ \
-- VALGRIND_CALL_NOREDIR_RAX \
-- "addq $32, %%rsp\n" \
-- "addq $128,%%rsp\n\t" \
-- : /*out*/ "=a" (_res) \
-- : /*in*/ "a" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7,arg8,arg9,arg10,arg11) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[12]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- _argvec[2] = (unsigned long)(arg2); \
-- _argvec[3] = (unsigned long)(arg3); \
-- _argvec[4] = (unsigned long)(arg4); \
-- _argvec[5] = (unsigned long)(arg5); \
-- _argvec[6] = (unsigned long)(arg6); \
-- _argvec[7] = (unsigned long)(arg7); \
-- _argvec[8] = (unsigned long)(arg8); \
-- _argvec[9] = (unsigned long)(arg9); \
-- _argvec[10] = (unsigned long)(arg10); \
-- _argvec[11] = (unsigned long)(arg11); \
-- __asm__ volatile( \
-- "subq $128,%%rsp\n\t" \
-- "pushq 88(%%rax)\n\t" \
-- "pushq 80(%%rax)\n\t" \
-- "pushq 72(%%rax)\n\t" \
-- "pushq 64(%%rax)\n\t" \
-- "pushq 56(%%rax)\n\t" \
-- "movq 48(%%rax), %%r9\n\t" \
-- "movq 40(%%rax), %%r8\n\t" \
-- "movq 32(%%rax), %%rcx\n\t" \
-- "movq 24(%%rax), %%rdx\n\t" \
-- "movq 16(%%rax), %%rsi\n\t" \
-- "movq 8(%%rax), %%rdi\n\t" \
-- "movq (%%rax), %%rax\n\t" /* target->%rax */ \
-- VALGRIND_CALL_NOREDIR_RAX \
-- "addq $40, %%rsp\n" \
-- "addq $128,%%rsp\n\t" \
-- : /*out*/ "=a" (_res) \
-- : /*in*/ "a" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7,arg8,arg9,arg10,arg11,arg12) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[13]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- _argvec[2] = (unsigned long)(arg2); \
-- _argvec[3] = (unsigned long)(arg3); \
-- _argvec[4] = (unsigned long)(arg4); \
-- _argvec[5] = (unsigned long)(arg5); \
-- _argvec[6] = (unsigned long)(arg6); \
-- _argvec[7] = (unsigned long)(arg7); \
-- _argvec[8] = (unsigned long)(arg8); \
-- _argvec[9] = (unsigned long)(arg9); \
-- _argvec[10] = (unsigned long)(arg10); \
-- _argvec[11] = (unsigned long)(arg11); \
-- _argvec[12] = (unsigned long)(arg12); \
-- __asm__ volatile( \
-- "subq $128,%%rsp\n\t" \
-- "pushq 96(%%rax)\n\t" \
-- "pushq 88(%%rax)\n\t" \
-- "pushq 80(%%rax)\n\t" \
-- "pushq 72(%%rax)\n\t" \
-- "pushq 64(%%rax)\n\t" \
-- "pushq 56(%%rax)\n\t" \
-- "movq 48(%%rax), %%r9\n\t" \
-- "movq 40(%%rax), %%r8\n\t" \
-- "movq 32(%%rax), %%rcx\n\t" \
-- "movq 24(%%rax), %%rdx\n\t" \
-- "movq 16(%%rax), %%rsi\n\t" \
-- "movq 8(%%rax), %%rdi\n\t" \
-- "movq (%%rax), %%rax\n\t" /* target->%rax */ \
-- VALGRIND_CALL_NOREDIR_RAX \
-- "addq $48, %%rsp\n" \
-- "addq $128,%%rsp\n\t" \
-- : /*out*/ "=a" (_res) \
-- : /*in*/ "a" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#endif /* PLAT_amd64_linux || PLAT_amd64_darwin */
--
--/* ------------------------ ppc32-linux ------------------------ */
--
--#if defined(PLAT_ppc32_linux)
--
--/* This is useful for finding out about the on-stack stuff:
--
-- extern int f9 ( int,int,int,int,int,int,int,int,int );
-- extern int f10 ( int,int,int,int,int,int,int,int,int,int );
-- extern int f11 ( int,int,int,int,int,int,int,int,int,int,int );
-- extern int f12 ( int,int,int,int,int,int,int,int,int,int,int,int );
--
-- int g9 ( void ) {
-- return f9(11,22,33,44,55,66,77,88,99);
-- }
-- int g10 ( void ) {
-- return f10(11,22,33,44,55,66,77,88,99,110);
-- }
-- int g11 ( void ) {
-- return f11(11,22,33,44,55,66,77,88,99,110,121);
-- }
-- int g12 ( void ) {
-- return f12(11,22,33,44,55,66,77,88,99,110,121,132);
-- }
--*/
--
--/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
--
--/* These regs are trashed by the hidden call. */
--#define __CALLER_SAVED_REGS \
-- "lr", "ctr", "xer", \
-- "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \
-- "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \
-- "r11", "r12", "r13"
--
--/* These CALL_FN_ macros assume that on ppc32-linux,
-- sizeof(unsigned long) == 4. */
--
--#define CALL_FN_W_v(lval, orig) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[1]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- "lwz 11,0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr %0,3" \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_W(lval, orig, arg1) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[2]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)arg1; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- "lwz 3,4(11)\n\t" /* arg1->r3 */ \
-- "lwz 11,0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr %0,3" \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_WW(lval, orig, arg1,arg2) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)arg1; \
-- _argvec[2] = (unsigned long)arg2; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- "lwz 3,4(11)\n\t" /* arg1->r3 */ \
-- "lwz 4,8(11)\n\t" \
-- "lwz 11,0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr %0,3" \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[4]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)arg1; \
-- _argvec[2] = (unsigned long)arg2; \
-- _argvec[3] = (unsigned long)arg3; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- "lwz 3,4(11)\n\t" /* arg1->r3 */ \
-- "lwz 4,8(11)\n\t" \
-- "lwz 5,12(11)\n\t" \
-- "lwz 11,0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr %0,3" \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[5]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)arg1; \
-- _argvec[2] = (unsigned long)arg2; \
-- _argvec[3] = (unsigned long)arg3; \
-- _argvec[4] = (unsigned long)arg4; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- "lwz 3,4(11)\n\t" /* arg1->r3 */ \
-- "lwz 4,8(11)\n\t" \
-- "lwz 5,12(11)\n\t" \
-- "lwz 6,16(11)\n\t" /* arg4->r6 */ \
-- "lwz 11,0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr %0,3" \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[6]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)arg1; \
-- _argvec[2] = (unsigned long)arg2; \
-- _argvec[3] = (unsigned long)arg3; \
-- _argvec[4] = (unsigned long)arg4; \
-- _argvec[5] = (unsigned long)arg5; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- "lwz 3,4(11)\n\t" /* arg1->r3 */ \
-- "lwz 4,8(11)\n\t" \
-- "lwz 5,12(11)\n\t" \
-- "lwz 6,16(11)\n\t" /* arg4->r6 */ \
-- "lwz 7,20(11)\n\t" \
-- "lwz 11,0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr %0,3" \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[7]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)arg1; \
-- _argvec[2] = (unsigned long)arg2; \
-- _argvec[3] = (unsigned long)arg3; \
-- _argvec[4] = (unsigned long)arg4; \
-- _argvec[5] = (unsigned long)arg5; \
-- _argvec[6] = (unsigned long)arg6; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- "lwz 3,4(11)\n\t" /* arg1->r3 */ \
-- "lwz 4,8(11)\n\t" \
-- "lwz 5,12(11)\n\t" \
-- "lwz 6,16(11)\n\t" /* arg4->r6 */ \
-- "lwz 7,20(11)\n\t" \
-- "lwz 8,24(11)\n\t" \
-- "lwz 11,0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr %0,3" \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[8]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)arg1; \
-- _argvec[2] = (unsigned long)arg2; \
-- _argvec[3] = (unsigned long)arg3; \
-- _argvec[4] = (unsigned long)arg4; \
-- _argvec[5] = (unsigned long)arg5; \
-- _argvec[6] = (unsigned long)arg6; \
-- _argvec[7] = (unsigned long)arg7; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- "lwz 3,4(11)\n\t" /* arg1->r3 */ \
-- "lwz 4,8(11)\n\t" \
-- "lwz 5,12(11)\n\t" \
-- "lwz 6,16(11)\n\t" /* arg4->r6 */ \
-- "lwz 7,20(11)\n\t" \
-- "lwz 8,24(11)\n\t" \
-- "lwz 9,28(11)\n\t" \
-- "lwz 11,0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr %0,3" \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7,arg8) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[9]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)arg1; \
-- _argvec[2] = (unsigned long)arg2; \
-- _argvec[3] = (unsigned long)arg3; \
-- _argvec[4] = (unsigned long)arg4; \
-- _argvec[5] = (unsigned long)arg5; \
-- _argvec[6] = (unsigned long)arg6; \
-- _argvec[7] = (unsigned long)arg7; \
-- _argvec[8] = (unsigned long)arg8; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- "lwz 3,4(11)\n\t" /* arg1->r3 */ \
-- "lwz 4,8(11)\n\t" \
-- "lwz 5,12(11)\n\t" \
-- "lwz 6,16(11)\n\t" /* arg4->r6 */ \
-- "lwz 7,20(11)\n\t" \
-- "lwz 8,24(11)\n\t" \
-- "lwz 9,28(11)\n\t" \
-- "lwz 10,32(11)\n\t" /* arg8->r10 */ \
-- "lwz 11,0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr %0,3" \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7,arg8,arg9) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[10]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)arg1; \
-- _argvec[2] = (unsigned long)arg2; \
-- _argvec[3] = (unsigned long)arg3; \
-- _argvec[4] = (unsigned long)arg4; \
-- _argvec[5] = (unsigned long)arg5; \
-- _argvec[6] = (unsigned long)arg6; \
-- _argvec[7] = (unsigned long)arg7; \
-- _argvec[8] = (unsigned long)arg8; \
-- _argvec[9] = (unsigned long)arg9; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- "addi 1,1,-16\n\t" \
-- /* arg9 */ \
-- "lwz 3,36(11)\n\t" \
-- "stw 3,8(1)\n\t" \
-- /* args1-8 */ \
-- "lwz 3,4(11)\n\t" /* arg1->r3 */ \
-- "lwz 4,8(11)\n\t" \
-- "lwz 5,12(11)\n\t" \
-- "lwz 6,16(11)\n\t" /* arg4->r6 */ \
-- "lwz 7,20(11)\n\t" \
-- "lwz 8,24(11)\n\t" \
-- "lwz 9,28(11)\n\t" \
-- "lwz 10,32(11)\n\t" /* arg8->r10 */ \
-- "lwz 11,0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "addi 1,1,16\n\t" \
-- "mr %0,3" \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7,arg8,arg9,arg10) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[11]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)arg1; \
-- _argvec[2] = (unsigned long)arg2; \
-- _argvec[3] = (unsigned long)arg3; \
-- _argvec[4] = (unsigned long)arg4; \
-- _argvec[5] = (unsigned long)arg5; \
-- _argvec[6] = (unsigned long)arg6; \
-- _argvec[7] = (unsigned long)arg7; \
-- _argvec[8] = (unsigned long)arg8; \
-- _argvec[9] = (unsigned long)arg9; \
-- _argvec[10] = (unsigned long)arg10; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- "addi 1,1,-16\n\t" \
-- /* arg10 */ \
-- "lwz 3,40(11)\n\t" \
-- "stw 3,12(1)\n\t" \
-- /* arg9 */ \
-- "lwz 3,36(11)\n\t" \
-- "stw 3,8(1)\n\t" \
-- /* args1-8 */ \
-- "lwz 3,4(11)\n\t" /* arg1->r3 */ \
-- "lwz 4,8(11)\n\t" \
-- "lwz 5,12(11)\n\t" \
-- "lwz 6,16(11)\n\t" /* arg4->r6 */ \
-- "lwz 7,20(11)\n\t" \
-- "lwz 8,24(11)\n\t" \
-- "lwz 9,28(11)\n\t" \
-- "lwz 10,32(11)\n\t" /* arg8->r10 */ \
-- "lwz 11,0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "addi 1,1,16\n\t" \
-- "mr %0,3" \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7,arg8,arg9,arg10,arg11) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[12]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)arg1; \
-- _argvec[2] = (unsigned long)arg2; \
-- _argvec[3] = (unsigned long)arg3; \
-- _argvec[4] = (unsigned long)arg4; \
-- _argvec[5] = (unsigned long)arg5; \
-- _argvec[6] = (unsigned long)arg6; \
-- _argvec[7] = (unsigned long)arg7; \
-- _argvec[8] = (unsigned long)arg8; \
-- _argvec[9] = (unsigned long)arg9; \
-- _argvec[10] = (unsigned long)arg10; \
-- _argvec[11] = (unsigned long)arg11; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- "addi 1,1,-32\n\t" \
-- /* arg11 */ \
-- "lwz 3,44(11)\n\t" \
-- "stw 3,16(1)\n\t" \
-- /* arg10 */ \
-- "lwz 3,40(11)\n\t" \
-- "stw 3,12(1)\n\t" \
-- /* arg9 */ \
-- "lwz 3,36(11)\n\t" \
-- "stw 3,8(1)\n\t" \
-- /* args1-8 */ \
-- "lwz 3,4(11)\n\t" /* arg1->r3 */ \
-- "lwz 4,8(11)\n\t" \
-- "lwz 5,12(11)\n\t" \
-- "lwz 6,16(11)\n\t" /* arg4->r6 */ \
-- "lwz 7,20(11)\n\t" \
-- "lwz 8,24(11)\n\t" \
-- "lwz 9,28(11)\n\t" \
-- "lwz 10,32(11)\n\t" /* arg8->r10 */ \
-- "lwz 11,0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "addi 1,1,32\n\t" \
-- "mr %0,3" \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7,arg8,arg9,arg10,arg11,arg12) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[13]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)arg1; \
-- _argvec[2] = (unsigned long)arg2; \
-- _argvec[3] = (unsigned long)arg3; \
-- _argvec[4] = (unsigned long)arg4; \
-- _argvec[5] = (unsigned long)arg5; \
-- _argvec[6] = (unsigned long)arg6; \
-- _argvec[7] = (unsigned long)arg7; \
-- _argvec[8] = (unsigned long)arg8; \
-- _argvec[9] = (unsigned long)arg9; \
-- _argvec[10] = (unsigned long)arg10; \
-- _argvec[11] = (unsigned long)arg11; \
-- _argvec[12] = (unsigned long)arg12; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- "addi 1,1,-32\n\t" \
-- /* arg12 */ \
-- "lwz 3,48(11)\n\t" \
-- "stw 3,20(1)\n\t" \
-- /* arg11 */ \
-- "lwz 3,44(11)\n\t" \
-- "stw 3,16(1)\n\t" \
-- /* arg10 */ \
-- "lwz 3,40(11)\n\t" \
-- "stw 3,12(1)\n\t" \
-- /* arg9 */ \
-- "lwz 3,36(11)\n\t" \
-- "stw 3,8(1)\n\t" \
-- /* args1-8 */ \
-- "lwz 3,4(11)\n\t" /* arg1->r3 */ \
-- "lwz 4,8(11)\n\t" \
-- "lwz 5,12(11)\n\t" \
-- "lwz 6,16(11)\n\t" /* arg4->r6 */ \
-- "lwz 7,20(11)\n\t" \
-- "lwz 8,24(11)\n\t" \
-- "lwz 9,28(11)\n\t" \
-- "lwz 10,32(11)\n\t" /* arg8->r10 */ \
-- "lwz 11,0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "addi 1,1,32\n\t" \
-- "mr %0,3" \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#endif /* PLAT_ppc32_linux */
--
--/* ------------------------ ppc64-linux ------------------------ */
--
--#if defined(PLAT_ppc64_linux)
--
--/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
--
--/* These regs are trashed by the hidden call. */
--#define __CALLER_SAVED_REGS \
-- "lr", "ctr", "xer", \
-- "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \
-- "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \
-- "r11", "r12", "r13"
--
--/* These CALL_FN_ macros assume that on ppc64-linux, sizeof(unsigned
-- long) == 8. */
--
--#define CALL_FN_W_v(lval, orig) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+0]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- "std 2,-16(11)\n\t" /* save tocptr */ \
-- "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
-- "ld 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "ld 2,-16(11)" /* restore tocptr */ \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_W(lval, orig, arg1) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+1]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- "std 2,-16(11)\n\t" /* save tocptr */ \
-- "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
-- "ld 3, 8(11)\n\t" /* arg1->r3 */ \
-- "ld 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "ld 2,-16(11)" /* restore tocptr */ \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_WW(lval, orig, arg1,arg2) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+2]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- _argvec[2+2] = (unsigned long)arg2; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- "std 2,-16(11)\n\t" /* save tocptr */ \
-- "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
-- "ld 3, 8(11)\n\t" /* arg1->r3 */ \
-- "ld 4, 16(11)\n\t" /* arg2->r4 */ \
-- "ld 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "ld 2,-16(11)" /* restore tocptr */ \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+3]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- _argvec[2+2] = (unsigned long)arg2; \
-- _argvec[2+3] = (unsigned long)arg3; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- "std 2,-16(11)\n\t" /* save tocptr */ \
-- "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
-- "ld 3, 8(11)\n\t" /* arg1->r3 */ \
-- "ld 4, 16(11)\n\t" /* arg2->r4 */ \
-- "ld 5, 24(11)\n\t" /* arg3->r5 */ \
-- "ld 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "ld 2,-16(11)" /* restore tocptr */ \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+4]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- _argvec[2+2] = (unsigned long)arg2; \
-- _argvec[2+3] = (unsigned long)arg3; \
-- _argvec[2+4] = (unsigned long)arg4; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- "std 2,-16(11)\n\t" /* save tocptr */ \
-- "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
-- "ld 3, 8(11)\n\t" /* arg1->r3 */ \
-- "ld 4, 16(11)\n\t" /* arg2->r4 */ \
-- "ld 5, 24(11)\n\t" /* arg3->r5 */ \
-- "ld 6, 32(11)\n\t" /* arg4->r6 */ \
-- "ld 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "ld 2,-16(11)" /* restore tocptr */ \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+5]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- _argvec[2+2] = (unsigned long)arg2; \
-- _argvec[2+3] = (unsigned long)arg3; \
-- _argvec[2+4] = (unsigned long)arg4; \
-- _argvec[2+5] = (unsigned long)arg5; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- "std 2,-16(11)\n\t" /* save tocptr */ \
-- "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
-- "ld 3, 8(11)\n\t" /* arg1->r3 */ \
-- "ld 4, 16(11)\n\t" /* arg2->r4 */ \
-- "ld 5, 24(11)\n\t" /* arg3->r5 */ \
-- "ld 6, 32(11)\n\t" /* arg4->r6 */ \
-- "ld 7, 40(11)\n\t" /* arg5->r7 */ \
-- "ld 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "ld 2,-16(11)" /* restore tocptr */ \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+6]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- _argvec[2+2] = (unsigned long)arg2; \
-- _argvec[2+3] = (unsigned long)arg3; \
-- _argvec[2+4] = (unsigned long)arg4; \
-- _argvec[2+5] = (unsigned long)arg5; \
-- _argvec[2+6] = (unsigned long)arg6; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- "std 2,-16(11)\n\t" /* save tocptr */ \
-- "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
-- "ld 3, 8(11)\n\t" /* arg1->r3 */ \
-- "ld 4, 16(11)\n\t" /* arg2->r4 */ \
-- "ld 5, 24(11)\n\t" /* arg3->r5 */ \
-- "ld 6, 32(11)\n\t" /* arg4->r6 */ \
-- "ld 7, 40(11)\n\t" /* arg5->r7 */ \
-- "ld 8, 48(11)\n\t" /* arg6->r8 */ \
-- "ld 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "ld 2,-16(11)" /* restore tocptr */ \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+7]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- _argvec[2+2] = (unsigned long)arg2; \
-- _argvec[2+3] = (unsigned long)arg3; \
-- _argvec[2+4] = (unsigned long)arg4; \
-- _argvec[2+5] = (unsigned long)arg5; \
-- _argvec[2+6] = (unsigned long)arg6; \
-- _argvec[2+7] = (unsigned long)arg7; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- "std 2,-16(11)\n\t" /* save tocptr */ \
-- "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
-- "ld 3, 8(11)\n\t" /* arg1->r3 */ \
-- "ld 4, 16(11)\n\t" /* arg2->r4 */ \
-- "ld 5, 24(11)\n\t" /* arg3->r5 */ \
-- "ld 6, 32(11)\n\t" /* arg4->r6 */ \
-- "ld 7, 40(11)\n\t" /* arg5->r7 */ \
-- "ld 8, 48(11)\n\t" /* arg6->r8 */ \
-- "ld 9, 56(11)\n\t" /* arg7->r9 */ \
-- "ld 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "ld 2,-16(11)" /* restore tocptr */ \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7,arg8) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+8]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- _argvec[2+2] = (unsigned long)arg2; \
-- _argvec[2+3] = (unsigned long)arg3; \
-- _argvec[2+4] = (unsigned long)arg4; \
-- _argvec[2+5] = (unsigned long)arg5; \
-- _argvec[2+6] = (unsigned long)arg6; \
-- _argvec[2+7] = (unsigned long)arg7; \
-- _argvec[2+8] = (unsigned long)arg8; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- "std 2,-16(11)\n\t" /* save tocptr */ \
-- "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
-- "ld 3, 8(11)\n\t" /* arg1->r3 */ \
-- "ld 4, 16(11)\n\t" /* arg2->r4 */ \
-- "ld 5, 24(11)\n\t" /* arg3->r5 */ \
-- "ld 6, 32(11)\n\t" /* arg4->r6 */ \
-- "ld 7, 40(11)\n\t" /* arg5->r7 */ \
-- "ld 8, 48(11)\n\t" /* arg6->r8 */ \
-- "ld 9, 56(11)\n\t" /* arg7->r9 */ \
-- "ld 10, 64(11)\n\t" /* arg8->r10 */ \
-- "ld 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "ld 2,-16(11)" /* restore tocptr */ \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7,arg8,arg9) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+9]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- _argvec[2+2] = (unsigned long)arg2; \
-- _argvec[2+3] = (unsigned long)arg3; \
-- _argvec[2+4] = (unsigned long)arg4; \
-- _argvec[2+5] = (unsigned long)arg5; \
-- _argvec[2+6] = (unsigned long)arg6; \
-- _argvec[2+7] = (unsigned long)arg7; \
-- _argvec[2+8] = (unsigned long)arg8; \
-- _argvec[2+9] = (unsigned long)arg9; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- "std 2,-16(11)\n\t" /* save tocptr */ \
-- "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
-- "addi 1,1,-128\n\t" /* expand stack frame */ \
-- /* arg9 */ \
-- "ld 3,72(11)\n\t" \
-- "std 3,112(1)\n\t" \
-- /* args1-8 */ \
-- "ld 3, 8(11)\n\t" /* arg1->r3 */ \
-- "ld 4, 16(11)\n\t" /* arg2->r4 */ \
-- "ld 5, 24(11)\n\t" /* arg3->r5 */ \
-- "ld 6, 32(11)\n\t" /* arg4->r6 */ \
-- "ld 7, 40(11)\n\t" /* arg5->r7 */ \
-- "ld 8, 48(11)\n\t" /* arg6->r8 */ \
-- "ld 9, 56(11)\n\t" /* arg7->r9 */ \
-- "ld 10, 64(11)\n\t" /* arg8->r10 */ \
-- "ld 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "ld 2,-16(11)\n\t" /* restore tocptr */ \
-- "addi 1,1,128" /* restore frame */ \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7,arg8,arg9,arg10) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+10]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- _argvec[2+2] = (unsigned long)arg2; \
-- _argvec[2+3] = (unsigned long)arg3; \
-- _argvec[2+4] = (unsigned long)arg4; \
-- _argvec[2+5] = (unsigned long)arg5; \
-- _argvec[2+6] = (unsigned long)arg6; \
-- _argvec[2+7] = (unsigned long)arg7; \
-- _argvec[2+8] = (unsigned long)arg8; \
-- _argvec[2+9] = (unsigned long)arg9; \
-- _argvec[2+10] = (unsigned long)arg10; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- "std 2,-16(11)\n\t" /* save tocptr */ \
-- "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
-- "addi 1,1,-128\n\t" /* expand stack frame */ \
-- /* arg10 */ \
-- "ld 3,80(11)\n\t" \
-- "std 3,120(1)\n\t" \
-- /* arg9 */ \
-- "ld 3,72(11)\n\t" \
-- "std 3,112(1)\n\t" \
-- /* args1-8 */ \
-- "ld 3, 8(11)\n\t" /* arg1->r3 */ \
-- "ld 4, 16(11)\n\t" /* arg2->r4 */ \
-- "ld 5, 24(11)\n\t" /* arg3->r5 */ \
-- "ld 6, 32(11)\n\t" /* arg4->r6 */ \
-- "ld 7, 40(11)\n\t" /* arg5->r7 */ \
-- "ld 8, 48(11)\n\t" /* arg6->r8 */ \
-- "ld 9, 56(11)\n\t" /* arg7->r9 */ \
-- "ld 10, 64(11)\n\t" /* arg8->r10 */ \
-- "ld 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "ld 2,-16(11)\n\t" /* restore tocptr */ \
-- "addi 1,1,128" /* restore frame */ \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7,arg8,arg9,arg10,arg11) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+11]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- _argvec[2+2] = (unsigned long)arg2; \
-- _argvec[2+3] = (unsigned long)arg3; \
-- _argvec[2+4] = (unsigned long)arg4; \
-- _argvec[2+5] = (unsigned long)arg5; \
-- _argvec[2+6] = (unsigned long)arg6; \
-- _argvec[2+7] = (unsigned long)arg7; \
-- _argvec[2+8] = (unsigned long)arg8; \
-- _argvec[2+9] = (unsigned long)arg9; \
-- _argvec[2+10] = (unsigned long)arg10; \
-- _argvec[2+11] = (unsigned long)arg11; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- "std 2,-16(11)\n\t" /* save tocptr */ \
-- "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
-- "addi 1,1,-144\n\t" /* expand stack frame */ \
-- /* arg11 */ \
-- "ld 3,88(11)\n\t" \
-- "std 3,128(1)\n\t" \
-- /* arg10 */ \
-- "ld 3,80(11)\n\t" \
-- "std 3,120(1)\n\t" \
-- /* arg9 */ \
-- "ld 3,72(11)\n\t" \
-- "std 3,112(1)\n\t" \
-- /* args1-8 */ \
-- "ld 3, 8(11)\n\t" /* arg1->r3 */ \
-- "ld 4, 16(11)\n\t" /* arg2->r4 */ \
-- "ld 5, 24(11)\n\t" /* arg3->r5 */ \
-- "ld 6, 32(11)\n\t" /* arg4->r6 */ \
-- "ld 7, 40(11)\n\t" /* arg5->r7 */ \
-- "ld 8, 48(11)\n\t" /* arg6->r8 */ \
-- "ld 9, 56(11)\n\t" /* arg7->r9 */ \
-- "ld 10, 64(11)\n\t" /* arg8->r10 */ \
-- "ld 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "ld 2,-16(11)\n\t" /* restore tocptr */ \
-- "addi 1,1,144" /* restore frame */ \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7,arg8,arg9,arg10,arg11,arg12) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+12]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- _argvec[2+2] = (unsigned long)arg2; \
-- _argvec[2+3] = (unsigned long)arg3; \
-- _argvec[2+4] = (unsigned long)arg4; \
-- _argvec[2+5] = (unsigned long)arg5; \
-- _argvec[2+6] = (unsigned long)arg6; \
-- _argvec[2+7] = (unsigned long)arg7; \
-- _argvec[2+8] = (unsigned long)arg8; \
-- _argvec[2+9] = (unsigned long)arg9; \
-- _argvec[2+10] = (unsigned long)arg10; \
-- _argvec[2+11] = (unsigned long)arg11; \
-- _argvec[2+12] = (unsigned long)arg12; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- "std 2,-16(11)\n\t" /* save tocptr */ \
-- "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
-- "addi 1,1,-144\n\t" /* expand stack frame */ \
-- /* arg12 */ \
-- "ld 3,96(11)\n\t" \
-- "std 3,136(1)\n\t" \
-- /* arg11 */ \
-- "ld 3,88(11)\n\t" \
-- "std 3,128(1)\n\t" \
-- /* arg10 */ \
-- "ld 3,80(11)\n\t" \
-- "std 3,120(1)\n\t" \
-- /* arg9 */ \
-- "ld 3,72(11)\n\t" \
-- "std 3,112(1)\n\t" \
-- /* args1-8 */ \
-- "ld 3, 8(11)\n\t" /* arg1->r3 */ \
-- "ld 4, 16(11)\n\t" /* arg2->r4 */ \
-- "ld 5, 24(11)\n\t" /* arg3->r5 */ \
-- "ld 6, 32(11)\n\t" /* arg4->r6 */ \
-- "ld 7, 40(11)\n\t" /* arg5->r7 */ \
-- "ld 8, 48(11)\n\t" /* arg6->r8 */ \
-- "ld 9, 56(11)\n\t" /* arg7->r9 */ \
-- "ld 10, 64(11)\n\t" /* arg8->r10 */ \
-- "ld 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "ld 2,-16(11)\n\t" /* restore tocptr */ \
-- "addi 1,1,144" /* restore frame */ \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#endif /* PLAT_ppc64_linux */
--
--/* ------------------------- arm-linux ------------------------- */
--
--#if defined(PLAT_arm_linux)
--
--/* These regs are trashed by the hidden call. */
--#define __CALLER_SAVED_REGS "r0", "r1", "r2", "r3","r4","r14"
--
--/* These CALL_FN_ macros assume that on arm-linux, sizeof(unsigned
-- long) == 4. */
--
--#define CALL_FN_W_v(lval, orig) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[1]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- __asm__ volatile( \
-- "ldr r4, [%1] \n\t" /* target->r4 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \
-- "mov %0, r0\n" \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "0" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_W(lval, orig, arg1) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[2]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- __asm__ volatile( \
-- "ldr r0, [%1, #4] \n\t" \
-- "ldr r4, [%1] \n\t" /* target->r4 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \
-- "mov %0, r0\n" \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "0" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_WW(lval, orig, arg1,arg2) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- _argvec[2] = (unsigned long)(arg2); \
-- __asm__ volatile( \
-- "ldr r0, [%1, #4] \n\t" \
-- "ldr r1, [%1, #8] \n\t" \
-- "ldr r4, [%1] \n\t" /* target->r4 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \
-- "mov %0, r0\n" \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "0" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[4]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- _argvec[2] = (unsigned long)(arg2); \
-- _argvec[3] = (unsigned long)(arg3); \
-- __asm__ volatile( \
-- "ldr r0, [%1, #4] \n\t" \
-- "ldr r1, [%1, #8] \n\t" \
-- "ldr r2, [%1, #12] \n\t" \
-- "ldr r4, [%1] \n\t" /* target->r4 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \
-- "mov %0, r0\n" \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "0" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[5]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- _argvec[2] = (unsigned long)(arg2); \
-- _argvec[3] = (unsigned long)(arg3); \
-- _argvec[4] = (unsigned long)(arg4); \
-- __asm__ volatile( \
-- "ldr r0, [%1, #4] \n\t" \
-- "ldr r1, [%1, #8] \n\t" \
-- "ldr r2, [%1, #12] \n\t" \
-- "ldr r3, [%1, #16] \n\t" \
-- "ldr r4, [%1] \n\t" /* target->r4 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \
-- "mov %0, r0" \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "0" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[6]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- _argvec[2] = (unsigned long)(arg2); \
-- _argvec[3] = (unsigned long)(arg3); \
-- _argvec[4] = (unsigned long)(arg4); \
-- _argvec[5] = (unsigned long)(arg5); \
-- __asm__ volatile( \
-- "ldr r0, [%1, #20] \n\t" \
-- "push {r0} \n\t" \
-- "ldr r0, [%1, #4] \n\t" \
-- "ldr r1, [%1, #8] \n\t" \
-- "ldr r2, [%1, #12] \n\t" \
-- "ldr r3, [%1, #16] \n\t" \
-- "ldr r4, [%1] \n\t" /* target->r4 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \
-- "add sp, sp, #4 \n\t" \
-- "mov %0, r0" \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "0" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[7]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- _argvec[2] = (unsigned long)(arg2); \
-- _argvec[3] = (unsigned long)(arg3); \
-- _argvec[4] = (unsigned long)(arg4); \
-- _argvec[5] = (unsigned long)(arg5); \
-- _argvec[6] = (unsigned long)(arg6); \
-- __asm__ volatile( \
-- "ldr r0, [%1, #20] \n\t" \
-- "ldr r1, [%1, #24] \n\t" \
-- "push {r0, r1} \n\t" \
-- "ldr r0, [%1, #4] \n\t" \
-- "ldr r1, [%1, #8] \n\t" \
-- "ldr r2, [%1, #12] \n\t" \
-- "ldr r3, [%1, #16] \n\t" \
-- "ldr r4, [%1] \n\t" /* target->r4 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \
-- "add sp, sp, #8 \n\t" \
-- "mov %0, r0" \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "0" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[8]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- _argvec[2] = (unsigned long)(arg2); \
-- _argvec[3] = (unsigned long)(arg3); \
-- _argvec[4] = (unsigned long)(arg4); \
-- _argvec[5] = (unsigned long)(arg5); \
-- _argvec[6] = (unsigned long)(arg6); \
-- _argvec[7] = (unsigned long)(arg7); \
-- __asm__ volatile( \
-- "ldr r0, [%1, #20] \n\t" \
-- "ldr r1, [%1, #24] \n\t" \
-- "ldr r2, [%1, #28] \n\t" \
-- "push {r0, r1, r2} \n\t" \
-- "ldr r0, [%1, #4] \n\t" \
-- "ldr r1, [%1, #8] \n\t" \
-- "ldr r2, [%1, #12] \n\t" \
-- "ldr r3, [%1, #16] \n\t" \
-- "ldr r4, [%1] \n\t" /* target->r4 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \
-- "add sp, sp, #12 \n\t" \
-- "mov %0, r0" \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "0" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7,arg8) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[9]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- _argvec[2] = (unsigned long)(arg2); \
-- _argvec[3] = (unsigned long)(arg3); \
-- _argvec[4] = (unsigned long)(arg4); \
-- _argvec[5] = (unsigned long)(arg5); \
-- _argvec[6] = (unsigned long)(arg6); \
-- _argvec[7] = (unsigned long)(arg7); \
-- _argvec[8] = (unsigned long)(arg8); \
-- __asm__ volatile( \
-- "ldr r0, [%1, #20] \n\t" \
-- "ldr r1, [%1, #24] \n\t" \
-- "ldr r2, [%1, #28] \n\t" \
-- "ldr r3, [%1, #32] \n\t" \
-- "push {r0, r1, r2, r3} \n\t" \
-- "ldr r0, [%1, #4] \n\t" \
-- "ldr r1, [%1, #8] \n\t" \
-- "ldr r2, [%1, #12] \n\t" \
-- "ldr r3, [%1, #16] \n\t" \
-- "ldr r4, [%1] \n\t" /* target->r4 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \
-- "add sp, sp, #16 \n\t" \
-- "mov %0, r0" \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "0" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7,arg8,arg9) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[10]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- _argvec[2] = (unsigned long)(arg2); \
-- _argvec[3] = (unsigned long)(arg3); \
-- _argvec[4] = (unsigned long)(arg4); \
-- _argvec[5] = (unsigned long)(arg5); \
-- _argvec[6] = (unsigned long)(arg6); \
-- _argvec[7] = (unsigned long)(arg7); \
-- _argvec[8] = (unsigned long)(arg8); \
-- _argvec[9] = (unsigned long)(arg9); \
-- __asm__ volatile( \
-- "ldr r0, [%1, #20] \n\t" \
-- "ldr r1, [%1, #24] \n\t" \
-- "ldr r2, [%1, #28] \n\t" \
-- "ldr r3, [%1, #32] \n\t" \
-- "ldr r4, [%1, #36] \n\t" \
-- "push {r0, r1, r2, r3, r4} \n\t" \
-- "ldr r0, [%1, #4] \n\t" \
-- "ldr r1, [%1, #8] \n\t" \
-- "ldr r2, [%1, #12] \n\t" \
-- "ldr r3, [%1, #16] \n\t" \
-- "ldr r4, [%1] \n\t" /* target->r4 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \
-- "add sp, sp, #20 \n\t" \
-- "mov %0, r0" \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "0" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7,arg8,arg9,arg10) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[11]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- _argvec[2] = (unsigned long)(arg2); \
-- _argvec[3] = (unsigned long)(arg3); \
-- _argvec[4] = (unsigned long)(arg4); \
-- _argvec[5] = (unsigned long)(arg5); \
-- _argvec[6] = (unsigned long)(arg6); \
-- _argvec[7] = (unsigned long)(arg7); \
-- _argvec[8] = (unsigned long)(arg8); \
-- _argvec[9] = (unsigned long)(arg9); \
-- _argvec[10] = (unsigned long)(arg10); \
-- __asm__ volatile( \
-- "ldr r0, [%1, #40] \n\t" \
-- "push {r0} \n\t" \
-- "ldr r0, [%1, #20] \n\t" \
-- "ldr r1, [%1, #24] \n\t" \
-- "ldr r2, [%1, #28] \n\t" \
-- "ldr r3, [%1, #32] \n\t" \
-- "ldr r4, [%1, #36] \n\t" \
-- "push {r0, r1, r2, r3, r4} \n\t" \
-- "ldr r0, [%1, #4] \n\t" \
-- "ldr r1, [%1, #8] \n\t" \
-- "ldr r2, [%1, #12] \n\t" \
-- "ldr r3, [%1, #16] \n\t" \
-- "ldr r4, [%1] \n\t" /* target->r4 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \
-- "add sp, sp, #24 \n\t" \
-- "mov %0, r0" \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "0" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5, \
-- arg6,arg7,arg8,arg9,arg10, \
-- arg11) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[12]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- _argvec[2] = (unsigned long)(arg2); \
-- _argvec[3] = (unsigned long)(arg3); \
-- _argvec[4] = (unsigned long)(arg4); \
-- _argvec[5] = (unsigned long)(arg5); \
-- _argvec[6] = (unsigned long)(arg6); \
-- _argvec[7] = (unsigned long)(arg7); \
-- _argvec[8] = (unsigned long)(arg8); \
-- _argvec[9] = (unsigned long)(arg9); \
-- _argvec[10] = (unsigned long)(arg10); \
-- _argvec[11] = (unsigned long)(arg11); \
-- __asm__ volatile( \
-- "ldr r0, [%1, #40] \n\t" \
-- "ldr r1, [%1, #44] \n\t" \
-- "push {r0, r1} \n\t" \
-- "ldr r0, [%1, #20] \n\t" \
-- "ldr r1, [%1, #24] \n\t" \
-- "ldr r2, [%1, #28] \n\t" \
-- "ldr r3, [%1, #32] \n\t" \
-- "ldr r4, [%1, #36] \n\t" \
-- "push {r0, r1, r2, r3, r4} \n\t" \
-- "ldr r0, [%1, #4] \n\t" \
-- "ldr r1, [%1, #8] \n\t" \
-- "ldr r2, [%1, #12] \n\t" \
-- "ldr r3, [%1, #16] \n\t" \
-- "ldr r4, [%1] \n\t" /* target->r4 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \
-- "add sp, sp, #28 \n\t" \
-- "mov %0, r0" \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "0" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory",__CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5, \
-- arg6,arg7,arg8,arg9,arg10, \
-- arg11,arg12) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[13]; \
-- volatile unsigned long _res; \
-- _argvec[0] = (unsigned long)_orig.nraddr; \
-- _argvec[1] = (unsigned long)(arg1); \
-- _argvec[2] = (unsigned long)(arg2); \
-- _argvec[3] = (unsigned long)(arg3); \
-- _argvec[4] = (unsigned long)(arg4); \
-- _argvec[5] = (unsigned long)(arg5); \
-- _argvec[6] = (unsigned long)(arg6); \
-- _argvec[7] = (unsigned long)(arg7); \
-- _argvec[8] = (unsigned long)(arg8); \
-- _argvec[9] = (unsigned long)(arg9); \
-- _argvec[10] = (unsigned long)(arg10); \
-- _argvec[11] = (unsigned long)(arg11); \
-- _argvec[12] = (unsigned long)(arg12); \
-- __asm__ volatile( \
-- "ldr r0, [%1, #40] \n\t" \
-- "ldr r1, [%1, #44] \n\t" \
-- "ldr r2, [%1, #48] \n\t" \
-- "push {r0, r1, r2} \n\t" \
-- "ldr r0, [%1, #20] \n\t" \
-- "ldr r1, [%1, #24] \n\t" \
-- "ldr r2, [%1, #28] \n\t" \
-- "ldr r3, [%1, #32] \n\t" \
-- "ldr r4, [%1, #36] \n\t" \
-- "push {r0, r1, r2, r3, r4} \n\t" \
-- "ldr r0, [%1, #4] \n\t" \
-- "ldr r1, [%1, #8] \n\t" \
-- "ldr r2, [%1, #12] \n\t" \
-- "ldr r3, [%1, #16] \n\t" \
-- "ldr r4, [%1] \n\t" /* target->r4 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \
-- "add sp, sp, #32 \n\t" \
-- "mov %0, r0" \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "0" (&_argvec[0]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#endif /* PLAT_arm_linux */
--
--/* ------------------------ ppc32-aix5 ------------------------- */
--
--#if defined(PLAT_ppc32_aix5)
--
--/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
--
--/* These regs are trashed by the hidden call. */
--#define __CALLER_SAVED_REGS \
-- "lr", "ctr", "xer", \
-- "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \
-- "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \
-- "r11", "r12", "r13"
--
--/* Expand the stack frame, copying enough info that unwinding
-- still works. Trashes r3. */
--
--#define VG_EXPAND_FRAME_BY_trashes_r3(_n_fr) \
-- "addi 1,1,-" #_n_fr "\n\t" \
-- "lwz 3," #_n_fr "(1)\n\t" \
-- "stw 3,0(1)\n\t"
--
--#define VG_CONTRACT_FRAME_BY(_n_fr) \
-- "addi 1,1," #_n_fr "\n\t"
--
--/* These CALL_FN_ macros assume that on ppc32-aix5, sizeof(unsigned
-- long) == 4. */
--
--#define CALL_FN_W_v(lval, orig) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+0]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- VG_EXPAND_FRAME_BY_trashes_r3(512) \
-- "stw 2,-8(11)\n\t" /* save tocptr */ \
-- "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
-- "lwz 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "lwz 2,-8(11)\n\t" /* restore tocptr */ \
-- VG_CONTRACT_FRAME_BY(512) \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_W(lval, orig, arg1) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+1]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- VG_EXPAND_FRAME_BY_trashes_r3(512) \
-- "stw 2,-8(11)\n\t" /* save tocptr */ \
-- "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
-- "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
-- "lwz 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "lwz 2,-8(11)\n\t" /* restore tocptr */ \
-- VG_CONTRACT_FRAME_BY(512) \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_WW(lval, orig, arg1,arg2) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+2]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- _argvec[2+2] = (unsigned long)arg2; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- VG_EXPAND_FRAME_BY_trashes_r3(512) \
-- "stw 2,-8(11)\n\t" /* save tocptr */ \
-- "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
-- "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
-- "lwz 4, 8(11)\n\t" /* arg2->r4 */ \
-- "lwz 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "lwz 2,-8(11)\n\t" /* restore tocptr */ \
-- VG_CONTRACT_FRAME_BY(512) \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+3]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- _argvec[2+2] = (unsigned long)arg2; \
-- _argvec[2+3] = (unsigned long)arg3; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- VG_EXPAND_FRAME_BY_trashes_r3(512) \
-- "stw 2,-8(11)\n\t" /* save tocptr */ \
-- "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
-- "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
-- "lwz 4, 8(11)\n\t" /* arg2->r4 */ \
-- "lwz 5, 12(11)\n\t" /* arg3->r5 */ \
-- "lwz 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "lwz 2,-8(11)\n\t" /* restore tocptr */ \
-- VG_CONTRACT_FRAME_BY(512) \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+4]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- _argvec[2+2] = (unsigned long)arg2; \
-- _argvec[2+3] = (unsigned long)arg3; \
-- _argvec[2+4] = (unsigned long)arg4; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- VG_EXPAND_FRAME_BY_trashes_r3(512) \
-- "stw 2,-8(11)\n\t" /* save tocptr */ \
-- "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
-- "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
-- "lwz 4, 8(11)\n\t" /* arg2->r4 */ \
-- "lwz 5, 12(11)\n\t" /* arg3->r5 */ \
-- "lwz 6, 16(11)\n\t" /* arg4->r6 */ \
-- "lwz 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "lwz 2,-8(11)\n\t" /* restore tocptr */ \
-- VG_CONTRACT_FRAME_BY(512) \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+5]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- _argvec[2+2] = (unsigned long)arg2; \
-- _argvec[2+3] = (unsigned long)arg3; \
-- _argvec[2+4] = (unsigned long)arg4; \
-- _argvec[2+5] = (unsigned long)arg5; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- VG_EXPAND_FRAME_BY_trashes_r3(512) \
-- "stw 2,-8(11)\n\t" /* save tocptr */ \
-- "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
-- "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
-- "lwz 4, 8(11)\n\t" /* arg2->r4 */ \
-- "lwz 5, 12(11)\n\t" /* arg3->r5 */ \
-- "lwz 6, 16(11)\n\t" /* arg4->r6 */ \
-- "lwz 7, 20(11)\n\t" /* arg5->r7 */ \
-- "lwz 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "lwz 2,-8(11)\n\t" /* restore tocptr */ \
-- VG_CONTRACT_FRAME_BY(512) \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+6]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- _argvec[2+2] = (unsigned long)arg2; \
-- _argvec[2+3] = (unsigned long)arg3; \
-- _argvec[2+4] = (unsigned long)arg4; \
-- _argvec[2+5] = (unsigned long)arg5; \
-- _argvec[2+6] = (unsigned long)arg6; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- VG_EXPAND_FRAME_BY_trashes_r3(512) \
-- "stw 2,-8(11)\n\t" /* save tocptr */ \
-- "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
-- "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
-- "lwz 4, 8(11)\n\t" /* arg2->r4 */ \
-- "lwz 5, 12(11)\n\t" /* arg3->r5 */ \
-- "lwz 6, 16(11)\n\t" /* arg4->r6 */ \
-- "lwz 7, 20(11)\n\t" /* arg5->r7 */ \
-- "lwz 8, 24(11)\n\t" /* arg6->r8 */ \
-- "lwz 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "lwz 2,-8(11)\n\t" /* restore tocptr */ \
-- VG_CONTRACT_FRAME_BY(512) \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+7]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- _argvec[2+2] = (unsigned long)arg2; \
-- _argvec[2+3] = (unsigned long)arg3; \
-- _argvec[2+4] = (unsigned long)arg4; \
-- _argvec[2+5] = (unsigned long)arg5; \
-- _argvec[2+6] = (unsigned long)arg6; \
-- _argvec[2+7] = (unsigned long)arg7; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- VG_EXPAND_FRAME_BY_trashes_r3(512) \
-- "stw 2,-8(11)\n\t" /* save tocptr */ \
-- "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
-- "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
-- "lwz 4, 8(11)\n\t" /* arg2->r4 */ \
-- "lwz 5, 12(11)\n\t" /* arg3->r5 */ \
-- "lwz 6, 16(11)\n\t" /* arg4->r6 */ \
-- "lwz 7, 20(11)\n\t" /* arg5->r7 */ \
-- "lwz 8, 24(11)\n\t" /* arg6->r8 */ \
-- "lwz 9, 28(11)\n\t" /* arg7->r9 */ \
-- "lwz 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "lwz 2,-8(11)\n\t" /* restore tocptr */ \
-- VG_CONTRACT_FRAME_BY(512) \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7,arg8) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+8]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- _argvec[2+2] = (unsigned long)arg2; \
-- _argvec[2+3] = (unsigned long)arg3; \
-- _argvec[2+4] = (unsigned long)arg4; \
-- _argvec[2+5] = (unsigned long)arg5; \
-- _argvec[2+6] = (unsigned long)arg6; \
-- _argvec[2+7] = (unsigned long)arg7; \
-- _argvec[2+8] = (unsigned long)arg8; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- VG_EXPAND_FRAME_BY_trashes_r3(512) \
-- "stw 2,-8(11)\n\t" /* save tocptr */ \
-- "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
-- "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
-- "lwz 4, 8(11)\n\t" /* arg2->r4 */ \
-- "lwz 5, 12(11)\n\t" /* arg3->r5 */ \
-- "lwz 6, 16(11)\n\t" /* arg4->r6 */ \
-- "lwz 7, 20(11)\n\t" /* arg5->r7 */ \
-- "lwz 8, 24(11)\n\t" /* arg6->r8 */ \
-- "lwz 9, 28(11)\n\t" /* arg7->r9 */ \
-- "lwz 10, 32(11)\n\t" /* arg8->r10 */ \
-- "lwz 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "lwz 2,-8(11)\n\t" /* restore tocptr */ \
-- VG_CONTRACT_FRAME_BY(512) \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7,arg8,arg9) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+9]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- _argvec[2+2] = (unsigned long)arg2; \
-- _argvec[2+3] = (unsigned long)arg3; \
-- _argvec[2+4] = (unsigned long)arg4; \
-- _argvec[2+5] = (unsigned long)arg5; \
-- _argvec[2+6] = (unsigned long)arg6; \
-- _argvec[2+7] = (unsigned long)arg7; \
-- _argvec[2+8] = (unsigned long)arg8; \
-- _argvec[2+9] = (unsigned long)arg9; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- VG_EXPAND_FRAME_BY_trashes_r3(512) \
-- "stw 2,-8(11)\n\t" /* save tocptr */ \
-- "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
-- VG_EXPAND_FRAME_BY_trashes_r3(64) \
-- /* arg9 */ \
-- "lwz 3,36(11)\n\t" \
-- "stw 3,56(1)\n\t" \
-- /* args1-8 */ \
-- "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
-- "lwz 4, 8(11)\n\t" /* arg2->r4 */ \
-- "lwz 5, 12(11)\n\t" /* arg3->r5 */ \
-- "lwz 6, 16(11)\n\t" /* arg4->r6 */ \
-- "lwz 7, 20(11)\n\t" /* arg5->r7 */ \
-- "lwz 8, 24(11)\n\t" /* arg6->r8 */ \
-- "lwz 9, 28(11)\n\t" /* arg7->r9 */ \
-- "lwz 10, 32(11)\n\t" /* arg8->r10 */ \
-- "lwz 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "lwz 2,-8(11)\n\t" /* restore tocptr */ \
-- VG_CONTRACT_FRAME_BY(64) \
-- VG_CONTRACT_FRAME_BY(512) \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7,arg8,arg9,arg10) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+10]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- _argvec[2+2] = (unsigned long)arg2; \
-- _argvec[2+3] = (unsigned long)arg3; \
-- _argvec[2+4] = (unsigned long)arg4; \
-- _argvec[2+5] = (unsigned long)arg5; \
-- _argvec[2+6] = (unsigned long)arg6; \
-- _argvec[2+7] = (unsigned long)arg7; \
-- _argvec[2+8] = (unsigned long)arg8; \
-- _argvec[2+9] = (unsigned long)arg9; \
-- _argvec[2+10] = (unsigned long)arg10; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- VG_EXPAND_FRAME_BY_trashes_r3(512) \
-- "stw 2,-8(11)\n\t" /* save tocptr */ \
-- "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
-- VG_EXPAND_FRAME_BY_trashes_r3(64) \
-- /* arg10 */ \
-- "lwz 3,40(11)\n\t" \
-- "stw 3,60(1)\n\t" \
-- /* arg9 */ \
-- "lwz 3,36(11)\n\t" \
-- "stw 3,56(1)\n\t" \
-- /* args1-8 */ \
-- "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
-- "lwz 4, 8(11)\n\t" /* arg2->r4 */ \
-- "lwz 5, 12(11)\n\t" /* arg3->r5 */ \
-- "lwz 6, 16(11)\n\t" /* arg4->r6 */ \
-- "lwz 7, 20(11)\n\t" /* arg5->r7 */ \
-- "lwz 8, 24(11)\n\t" /* arg6->r8 */ \
-- "lwz 9, 28(11)\n\t" /* arg7->r9 */ \
-- "lwz 10, 32(11)\n\t" /* arg8->r10 */ \
-- "lwz 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "lwz 2,-8(11)\n\t" /* restore tocptr */ \
-- VG_CONTRACT_FRAME_BY(64) \
-- VG_CONTRACT_FRAME_BY(512) \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7,arg8,arg9,arg10,arg11) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+11]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- _argvec[2+2] = (unsigned long)arg2; \
-- _argvec[2+3] = (unsigned long)arg3; \
-- _argvec[2+4] = (unsigned long)arg4; \
-- _argvec[2+5] = (unsigned long)arg5; \
-- _argvec[2+6] = (unsigned long)arg6; \
-- _argvec[2+7] = (unsigned long)arg7; \
-- _argvec[2+8] = (unsigned long)arg8; \
-- _argvec[2+9] = (unsigned long)arg9; \
-- _argvec[2+10] = (unsigned long)arg10; \
-- _argvec[2+11] = (unsigned long)arg11; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- VG_EXPAND_FRAME_BY_trashes_r3(512) \
-- "stw 2,-8(11)\n\t" /* save tocptr */ \
-- "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
-- VG_EXPAND_FRAME_BY_trashes_r3(72) \
-- /* arg11 */ \
-- "lwz 3,44(11)\n\t" \
-- "stw 3,64(1)\n\t" \
-- /* arg10 */ \
-- "lwz 3,40(11)\n\t" \
-- "stw 3,60(1)\n\t" \
-- /* arg9 */ \
-- "lwz 3,36(11)\n\t" \
-- "stw 3,56(1)\n\t" \
-- /* args1-8 */ \
-- "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
-- "lwz 4, 8(11)\n\t" /* arg2->r4 */ \
-- "lwz 5, 12(11)\n\t" /* arg3->r5 */ \
-- "lwz 6, 16(11)\n\t" /* arg4->r6 */ \
-- "lwz 7, 20(11)\n\t" /* arg5->r7 */ \
-- "lwz 8, 24(11)\n\t" /* arg6->r8 */ \
-- "lwz 9, 28(11)\n\t" /* arg7->r9 */ \
-- "lwz 10, 32(11)\n\t" /* arg8->r10 */ \
-- "lwz 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "lwz 2,-8(11)\n\t" /* restore tocptr */ \
-- VG_CONTRACT_FRAME_BY(72) \
-- VG_CONTRACT_FRAME_BY(512) \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7,arg8,arg9,arg10,arg11,arg12) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+12]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- _argvec[2+2] = (unsigned long)arg2; \
-- _argvec[2+3] = (unsigned long)arg3; \
-- _argvec[2+4] = (unsigned long)arg4; \
-- _argvec[2+5] = (unsigned long)arg5; \
-- _argvec[2+6] = (unsigned long)arg6; \
-- _argvec[2+7] = (unsigned long)arg7; \
-- _argvec[2+8] = (unsigned long)arg8; \
-- _argvec[2+9] = (unsigned long)arg9; \
-- _argvec[2+10] = (unsigned long)arg10; \
-- _argvec[2+11] = (unsigned long)arg11; \
-- _argvec[2+12] = (unsigned long)arg12; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- VG_EXPAND_FRAME_BY_trashes_r3(512) \
-- "stw 2,-8(11)\n\t" /* save tocptr */ \
-- "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
-- VG_EXPAND_FRAME_BY_trashes_r3(72) \
-- /* arg12 */ \
-- "lwz 3,48(11)\n\t" \
-- "stw 3,68(1)\n\t" \
-- /* arg11 */ \
-- "lwz 3,44(11)\n\t" \
-- "stw 3,64(1)\n\t" \
-- /* arg10 */ \
-- "lwz 3,40(11)\n\t" \
-- "stw 3,60(1)\n\t" \
-- /* arg9 */ \
-- "lwz 3,36(11)\n\t" \
-- "stw 3,56(1)\n\t" \
-- /* args1-8 */ \
-- "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
-- "lwz 4, 8(11)\n\t" /* arg2->r4 */ \
-- "lwz 5, 12(11)\n\t" /* arg3->r5 */ \
-- "lwz 6, 16(11)\n\t" /* arg4->r6 */ \
-- "lwz 7, 20(11)\n\t" /* arg5->r7 */ \
-- "lwz 8, 24(11)\n\t" /* arg6->r8 */ \
-- "lwz 9, 28(11)\n\t" /* arg7->r9 */ \
-- "lwz 10, 32(11)\n\t" /* arg8->r10 */ \
-- "lwz 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "lwz 2,-8(11)\n\t" /* restore tocptr */ \
-- VG_CONTRACT_FRAME_BY(72) \
-- VG_CONTRACT_FRAME_BY(512) \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#endif /* PLAT_ppc32_aix5 */
--
--/* ------------------------ ppc64-aix5 ------------------------- */
--
--#if defined(PLAT_ppc64_aix5)
--
--/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
--
--/* These regs are trashed by the hidden call. */
--#define __CALLER_SAVED_REGS \
-- "lr", "ctr", "xer", \
-- "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \
-- "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \
-- "r11", "r12", "r13"
--
--/* Expand the stack frame, copying enough info that unwinding
-- still works. Trashes r3. */
--
--#define VG_EXPAND_FRAME_BY_trashes_r3(_n_fr) \
-- "addi 1,1,-" #_n_fr "\n\t" \
-- "ld 3," #_n_fr "(1)\n\t" \
-- "std 3,0(1)\n\t"
--
--#define VG_CONTRACT_FRAME_BY(_n_fr) \
-- "addi 1,1," #_n_fr "\n\t"
--
--/* These CALL_FN_ macros assume that on ppc64-aix5, sizeof(unsigned
-- long) == 8. */
--
--#define CALL_FN_W_v(lval, orig) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+0]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- VG_EXPAND_FRAME_BY_trashes_r3(512) \
-- "std 2,-16(11)\n\t" /* save tocptr */ \
-- "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
-- "ld 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "ld 2,-16(11)\n\t" /* restore tocptr */ \
-- VG_CONTRACT_FRAME_BY(512) \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_W(lval, orig, arg1) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+1]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- VG_EXPAND_FRAME_BY_trashes_r3(512) \
-- "std 2,-16(11)\n\t" /* save tocptr */ \
-- "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
-- "ld 3, 8(11)\n\t" /* arg1->r3 */ \
-- "ld 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "ld 2,-16(11)\n\t" /* restore tocptr */ \
-- VG_CONTRACT_FRAME_BY(512) \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_WW(lval, orig, arg1,arg2) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+2]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- _argvec[2+2] = (unsigned long)arg2; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- VG_EXPAND_FRAME_BY_trashes_r3(512) \
-- "std 2,-16(11)\n\t" /* save tocptr */ \
-- "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
-- "ld 3, 8(11)\n\t" /* arg1->r3 */ \
-- "ld 4, 16(11)\n\t" /* arg2->r4 */ \
-- "ld 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "ld 2,-16(11)\n\t" /* restore tocptr */ \
-- VG_CONTRACT_FRAME_BY(512) \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+3]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- _argvec[2+2] = (unsigned long)arg2; \
-- _argvec[2+3] = (unsigned long)arg3; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- VG_EXPAND_FRAME_BY_trashes_r3(512) \
-- "std 2,-16(11)\n\t" /* save tocptr */ \
-- "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
-- "ld 3, 8(11)\n\t" /* arg1->r3 */ \
-- "ld 4, 16(11)\n\t" /* arg2->r4 */ \
-- "ld 5, 24(11)\n\t" /* arg3->r5 */ \
-- "ld 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "ld 2,-16(11)\n\t" /* restore tocptr */ \
-- VG_CONTRACT_FRAME_BY(512) \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+4]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- _argvec[2+2] = (unsigned long)arg2; \
-- _argvec[2+3] = (unsigned long)arg3; \
-- _argvec[2+4] = (unsigned long)arg4; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- VG_EXPAND_FRAME_BY_trashes_r3(512) \
-- "std 2,-16(11)\n\t" /* save tocptr */ \
-- "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
-- "ld 3, 8(11)\n\t" /* arg1->r3 */ \
-- "ld 4, 16(11)\n\t" /* arg2->r4 */ \
-- "ld 5, 24(11)\n\t" /* arg3->r5 */ \
-- "ld 6, 32(11)\n\t" /* arg4->r6 */ \
-- "ld 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "ld 2,-16(11)\n\t" /* restore tocptr */ \
-- VG_CONTRACT_FRAME_BY(512) \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+5]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- _argvec[2+2] = (unsigned long)arg2; \
-- _argvec[2+3] = (unsigned long)arg3; \
-- _argvec[2+4] = (unsigned long)arg4; \
-- _argvec[2+5] = (unsigned long)arg5; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- VG_EXPAND_FRAME_BY_trashes_r3(512) \
-- "std 2,-16(11)\n\t" /* save tocptr */ \
-- "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
-- "ld 3, 8(11)\n\t" /* arg1->r3 */ \
-- "ld 4, 16(11)\n\t" /* arg2->r4 */ \
-- "ld 5, 24(11)\n\t" /* arg3->r5 */ \
-- "ld 6, 32(11)\n\t" /* arg4->r6 */ \
-- "ld 7, 40(11)\n\t" /* arg5->r7 */ \
-- "ld 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "ld 2,-16(11)\n\t" /* restore tocptr */ \
-- VG_CONTRACT_FRAME_BY(512) \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+6]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- _argvec[2+2] = (unsigned long)arg2; \
-- _argvec[2+3] = (unsigned long)arg3; \
-- _argvec[2+4] = (unsigned long)arg4; \
-- _argvec[2+5] = (unsigned long)arg5; \
-- _argvec[2+6] = (unsigned long)arg6; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- VG_EXPAND_FRAME_BY_trashes_r3(512) \
-- "std 2,-16(11)\n\t" /* save tocptr */ \
-- "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
-- "ld 3, 8(11)\n\t" /* arg1->r3 */ \
-- "ld 4, 16(11)\n\t" /* arg2->r4 */ \
-- "ld 5, 24(11)\n\t" /* arg3->r5 */ \
-- "ld 6, 32(11)\n\t" /* arg4->r6 */ \
-- "ld 7, 40(11)\n\t" /* arg5->r7 */ \
-- "ld 8, 48(11)\n\t" /* arg6->r8 */ \
-- "ld 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "ld 2,-16(11)\n\t" /* restore tocptr */ \
-- VG_CONTRACT_FRAME_BY(512) \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+7]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- _argvec[2+2] = (unsigned long)arg2; \
-- _argvec[2+3] = (unsigned long)arg3; \
-- _argvec[2+4] = (unsigned long)arg4; \
-- _argvec[2+5] = (unsigned long)arg5; \
-- _argvec[2+6] = (unsigned long)arg6; \
-- _argvec[2+7] = (unsigned long)arg7; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- VG_EXPAND_FRAME_BY_trashes_r3(512) \
-- "std 2,-16(11)\n\t" /* save tocptr */ \
-- "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
-- "ld 3, 8(11)\n\t" /* arg1->r3 */ \
-- "ld 4, 16(11)\n\t" /* arg2->r4 */ \
-- "ld 5, 24(11)\n\t" /* arg3->r5 */ \
-- "ld 6, 32(11)\n\t" /* arg4->r6 */ \
-- "ld 7, 40(11)\n\t" /* arg5->r7 */ \
-- "ld 8, 48(11)\n\t" /* arg6->r8 */ \
-- "ld 9, 56(11)\n\t" /* arg7->r9 */ \
-- "ld 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "ld 2,-16(11)\n\t" /* restore tocptr */ \
-- VG_CONTRACT_FRAME_BY(512) \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7,arg8) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+8]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- _argvec[2+2] = (unsigned long)arg2; \
-- _argvec[2+3] = (unsigned long)arg3; \
-- _argvec[2+4] = (unsigned long)arg4; \
-- _argvec[2+5] = (unsigned long)arg5; \
-- _argvec[2+6] = (unsigned long)arg6; \
-- _argvec[2+7] = (unsigned long)arg7; \
-- _argvec[2+8] = (unsigned long)arg8; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- VG_EXPAND_FRAME_BY_trashes_r3(512) \
-- "std 2,-16(11)\n\t" /* save tocptr */ \
-- "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
-- "ld 3, 8(11)\n\t" /* arg1->r3 */ \
-- "ld 4, 16(11)\n\t" /* arg2->r4 */ \
-- "ld 5, 24(11)\n\t" /* arg3->r5 */ \
-- "ld 6, 32(11)\n\t" /* arg4->r6 */ \
-- "ld 7, 40(11)\n\t" /* arg5->r7 */ \
-- "ld 8, 48(11)\n\t" /* arg6->r8 */ \
-- "ld 9, 56(11)\n\t" /* arg7->r9 */ \
-- "ld 10, 64(11)\n\t" /* arg8->r10 */ \
-- "ld 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "ld 2,-16(11)\n\t" /* restore tocptr */ \
-- VG_CONTRACT_FRAME_BY(512) \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7,arg8,arg9) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+9]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- _argvec[2+2] = (unsigned long)arg2; \
-- _argvec[2+3] = (unsigned long)arg3; \
-- _argvec[2+4] = (unsigned long)arg4; \
-- _argvec[2+5] = (unsigned long)arg5; \
-- _argvec[2+6] = (unsigned long)arg6; \
-- _argvec[2+7] = (unsigned long)arg7; \
-- _argvec[2+8] = (unsigned long)arg8; \
-- _argvec[2+9] = (unsigned long)arg9; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- VG_EXPAND_FRAME_BY_trashes_r3(512) \
-- "std 2,-16(11)\n\t" /* save tocptr */ \
-- "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
-- VG_EXPAND_FRAME_BY_trashes_r3(128) \
-- /* arg9 */ \
-- "ld 3,72(11)\n\t" \
-- "std 3,112(1)\n\t" \
-- /* args1-8 */ \
-- "ld 3, 8(11)\n\t" /* arg1->r3 */ \
-- "ld 4, 16(11)\n\t" /* arg2->r4 */ \
-- "ld 5, 24(11)\n\t" /* arg3->r5 */ \
-- "ld 6, 32(11)\n\t" /* arg4->r6 */ \
-- "ld 7, 40(11)\n\t" /* arg5->r7 */ \
-- "ld 8, 48(11)\n\t" /* arg6->r8 */ \
-- "ld 9, 56(11)\n\t" /* arg7->r9 */ \
-- "ld 10, 64(11)\n\t" /* arg8->r10 */ \
-- "ld 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "ld 2,-16(11)\n\t" /* restore tocptr */ \
-- VG_CONTRACT_FRAME_BY(128) \
-- VG_CONTRACT_FRAME_BY(512) \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7,arg8,arg9,arg10) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+10]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- _argvec[2+2] = (unsigned long)arg2; \
-- _argvec[2+3] = (unsigned long)arg3; \
-- _argvec[2+4] = (unsigned long)arg4; \
-- _argvec[2+5] = (unsigned long)arg5; \
-- _argvec[2+6] = (unsigned long)arg6; \
-- _argvec[2+7] = (unsigned long)arg7; \
-- _argvec[2+8] = (unsigned long)arg8; \
-- _argvec[2+9] = (unsigned long)arg9; \
-- _argvec[2+10] = (unsigned long)arg10; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- VG_EXPAND_FRAME_BY_trashes_r3(512) \
-- "std 2,-16(11)\n\t" /* save tocptr */ \
-- "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
-- VG_EXPAND_FRAME_BY_trashes_r3(128) \
-- /* arg10 */ \
-- "ld 3,80(11)\n\t" \
-- "std 3,120(1)\n\t" \
-- /* arg9 */ \
-- "ld 3,72(11)\n\t" \
-- "std 3,112(1)\n\t" \
-- /* args1-8 */ \
-- "ld 3, 8(11)\n\t" /* arg1->r3 */ \
-- "ld 4, 16(11)\n\t" /* arg2->r4 */ \
-- "ld 5, 24(11)\n\t" /* arg3->r5 */ \
-- "ld 6, 32(11)\n\t" /* arg4->r6 */ \
-- "ld 7, 40(11)\n\t" /* arg5->r7 */ \
-- "ld 8, 48(11)\n\t" /* arg6->r8 */ \
-- "ld 9, 56(11)\n\t" /* arg7->r9 */ \
-- "ld 10, 64(11)\n\t" /* arg8->r10 */ \
-- "ld 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "ld 2,-16(11)\n\t" /* restore tocptr */ \
-- VG_CONTRACT_FRAME_BY(128) \
-- VG_CONTRACT_FRAME_BY(512) \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7,arg8,arg9,arg10,arg11) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+11]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- _argvec[2+2] = (unsigned long)arg2; \
-- _argvec[2+3] = (unsigned long)arg3; \
-- _argvec[2+4] = (unsigned long)arg4; \
-- _argvec[2+5] = (unsigned long)arg5; \
-- _argvec[2+6] = (unsigned long)arg6; \
-- _argvec[2+7] = (unsigned long)arg7; \
-- _argvec[2+8] = (unsigned long)arg8; \
-- _argvec[2+9] = (unsigned long)arg9; \
-- _argvec[2+10] = (unsigned long)arg10; \
-- _argvec[2+11] = (unsigned long)arg11; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- VG_EXPAND_FRAME_BY_trashes_r3(512) \
-- "std 2,-16(11)\n\t" /* save tocptr */ \
-- "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
-- VG_EXPAND_FRAME_BY_trashes_r3(144) \
-- /* arg11 */ \
-- "ld 3,88(11)\n\t" \
-- "std 3,128(1)\n\t" \
-- /* arg10 */ \
-- "ld 3,80(11)\n\t" \
-- "std 3,120(1)\n\t" \
-- /* arg9 */ \
-- "ld 3,72(11)\n\t" \
-- "std 3,112(1)\n\t" \
-- /* args1-8 */ \
-- "ld 3, 8(11)\n\t" /* arg1->r3 */ \
-- "ld 4, 16(11)\n\t" /* arg2->r4 */ \
-- "ld 5, 24(11)\n\t" /* arg3->r5 */ \
-- "ld 6, 32(11)\n\t" /* arg4->r6 */ \
-- "ld 7, 40(11)\n\t" /* arg5->r7 */ \
-- "ld 8, 48(11)\n\t" /* arg6->r8 */ \
-- "ld 9, 56(11)\n\t" /* arg7->r9 */ \
-- "ld 10, 64(11)\n\t" /* arg8->r10 */ \
-- "ld 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "ld 2,-16(11)\n\t" /* restore tocptr */ \
-- VG_CONTRACT_FRAME_BY(144) \
-- VG_CONTRACT_FRAME_BY(512) \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
-- arg7,arg8,arg9,arg10,arg11,arg12) \
-- do { \
-- volatile OrigFn _orig = (orig); \
-- volatile unsigned long _argvec[3+12]; \
-- volatile unsigned long _res; \
-- /* _argvec[0] holds current r2 across the call */ \
-- _argvec[1] = (unsigned long)_orig.r2; \
-- _argvec[2] = (unsigned long)_orig.nraddr; \
-- _argvec[2+1] = (unsigned long)arg1; \
-- _argvec[2+2] = (unsigned long)arg2; \
-- _argvec[2+3] = (unsigned long)arg3; \
-- _argvec[2+4] = (unsigned long)arg4; \
-- _argvec[2+5] = (unsigned long)arg5; \
-- _argvec[2+6] = (unsigned long)arg6; \
-- _argvec[2+7] = (unsigned long)arg7; \
-- _argvec[2+8] = (unsigned long)arg8; \
-- _argvec[2+9] = (unsigned long)arg9; \
-- _argvec[2+10] = (unsigned long)arg10; \
-- _argvec[2+11] = (unsigned long)arg11; \
-- _argvec[2+12] = (unsigned long)arg12; \
-- __asm__ volatile( \
-- "mr 11,%1\n\t" \
-- VG_EXPAND_FRAME_BY_trashes_r3(512) \
-- "std 2,-16(11)\n\t" /* save tocptr */ \
-- "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
-- VG_EXPAND_FRAME_BY_trashes_r3(144) \
-- /* arg12 */ \
-- "ld 3,96(11)\n\t" \
-- "std 3,136(1)\n\t" \
-- /* arg11 */ \
-- "ld 3,88(11)\n\t" \
-- "std 3,128(1)\n\t" \
-- /* arg10 */ \
-- "ld 3,80(11)\n\t" \
-- "std 3,120(1)\n\t" \
-- /* arg9 */ \
-- "ld 3,72(11)\n\t" \
-- "std 3,112(1)\n\t" \
-- /* args1-8 */ \
-- "ld 3, 8(11)\n\t" /* arg1->r3 */ \
-- "ld 4, 16(11)\n\t" /* arg2->r4 */ \
-- "ld 5, 24(11)\n\t" /* arg3->r5 */ \
-- "ld 6, 32(11)\n\t" /* arg4->r6 */ \
-- "ld 7, 40(11)\n\t" /* arg5->r7 */ \
-- "ld 8, 48(11)\n\t" /* arg6->r8 */ \
-- "ld 9, 56(11)\n\t" /* arg7->r9 */ \
-- "ld 10, 64(11)\n\t" /* arg8->r10 */ \
-- "ld 11, 0(11)\n\t" /* target->r11 */ \
-- VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
-- "mr 11,%1\n\t" \
-- "mr %0,3\n\t" \
-- "ld 2,-16(11)\n\t" /* restore tocptr */ \
-- VG_CONTRACT_FRAME_BY(144) \
-- VG_CONTRACT_FRAME_BY(512) \
-- : /*out*/ "=r" (_res) \
-- : /*in*/ "r" (&_argvec[2]) \
-- : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
-- ); \
-- lval = (__typeof__(lval)) _res; \
-- } while (0)
--
--#endif /* PLAT_ppc64_aix5 */
--
--
--/* ------------------------------------------------------------------ */
--/* ARCHITECTURE INDEPENDENT MACROS for CLIENT REQUESTS. */
--/* */
--/* ------------------------------------------------------------------ */
--
--/* Some request codes. There are many more of these, but most are not
-- exposed to end-user view. These are the public ones, all of the
-- form 0x1000 + small_number.
--
-- Core ones are in the range 0x00000000--0x0000ffff. The non-public
-- ones start at 0x2000.
--*/
--
--/* These macros are used by tools -- they must be public, but don't
-- embed them into other programs. */
--#define VG_USERREQ_TOOL_BASE(a,b) \
-- ((unsigned int)(((a)&0xff) << 24 | ((b)&0xff) << 16))
--#define VG_IS_TOOL_USERREQ(a, b, v) \
-- (VG_USERREQ_TOOL_BASE(a,b) == ((v) & 0xffff0000))
--
--/* !! ABIWARNING !! ABIWARNING !! ABIWARNING !! ABIWARNING !!
-- This enum comprises an ABI exported by Valgrind to programs
-- which use client requests. DO NOT CHANGE THE ORDER OF THESE
-- ENTRIES, NOR DELETE ANY -- add new ones at the end. */
--typedef
-- enum { VG_USERREQ__RUNNING_ON_VALGRIND = 0x1001,
-- VG_USERREQ__DISCARD_TRANSLATIONS = 0x1002,
--
-- /* These allow any function to be called from the simulated
-- CPU but run on the real CPU. Nb: the first arg passed to
-- the function is always the ThreadId of the running
-- thread! So CLIENT_CALL0 actually requires a 1 arg
-- function, etc. */
-- VG_USERREQ__CLIENT_CALL0 = 0x1101,
-- VG_USERREQ__CLIENT_CALL1 = 0x1102,
-- VG_USERREQ__CLIENT_CALL2 = 0x1103,
-- VG_USERREQ__CLIENT_CALL3 = 0x1104,
--
-- /* Can be useful in regression testing suites -- eg. can
-- send Valgrind's output to /dev/null and still count
-- errors. */
-- VG_USERREQ__COUNT_ERRORS = 0x1201,
--
-- /* These are useful and can be interpreted by any tool that
-- tracks malloc() et al, by using vg_replace_malloc.c. */
-- VG_USERREQ__MALLOCLIKE_BLOCK = 0x1301,
-- VG_USERREQ__FREELIKE_BLOCK = 0x1302,
-- /* Memory pool support. */
-- VG_USERREQ__CREATE_MEMPOOL = 0x1303,
-- VG_USERREQ__DESTROY_MEMPOOL = 0x1304,
-- VG_USERREQ__MEMPOOL_ALLOC = 0x1305,
-- VG_USERREQ__MEMPOOL_FREE = 0x1306,
-- VG_USERREQ__MEMPOOL_TRIM = 0x1307,
-- VG_USERREQ__MOVE_MEMPOOL = 0x1308,
-- VG_USERREQ__MEMPOOL_CHANGE = 0x1309,
-- VG_USERREQ__MEMPOOL_EXISTS = 0x130a,
--
-- /* Allow printfs to valgrind log. */
-- /* The first two pass the va_list argument by value, which
-- assumes it is the same size as or smaller than a UWord,
-- which generally isn't the case. Hence are deprecated.
-- The second two pass the vargs by reference and so are
-- immune to this problem. */
-- /* both :: char* fmt, va_list vargs (DEPRECATED) */
-- VG_USERREQ__PRINTF = 0x1401,
-- VG_USERREQ__PRINTF_BACKTRACE = 0x1402,
-- /* both :: char* fmt, va_list* vargs */
-- VG_USERREQ__PRINTF_VALIST_BY_REF = 0x1403,
-- VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF = 0x1404,
--
-- /* Stack support. */
-- VG_USERREQ__STACK_REGISTER = 0x1501,
-- VG_USERREQ__STACK_DEREGISTER = 0x1502,
-- VG_USERREQ__STACK_CHANGE = 0x1503,
--
-- /* Wine support */
-- VG_USERREQ__LOAD_PDB_DEBUGINFO = 0x1601
-- } Vg_ClientRequest;
--
--#if !defined(__GNUC__)
--# define __extension__ /* */
--#endif
--
--/* Returns the number of Valgrinds this code is running under. That
-- is, 0 if running natively, 1 if running under Valgrind, 2 if
-- running under Valgrind which is running under another Valgrind,
-- etc. */
--#define RUNNING_ON_VALGRIND __extension__ \
-- ({unsigned int _qzz_res; \
-- VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0 /* if not */, \
-- VG_USERREQ__RUNNING_ON_VALGRIND, \
-- 0, 0, 0, 0, 0); \
-- _qzz_res; \
-- })
--
--
--/* Discard translation of code in the range [_qzz_addr .. _qzz_addr +
-- _qzz_len - 1]. Useful if you are debugging a JITter or some such,
-- since it provides a way to make sure valgrind will retranslate the
-- invalidated area. Returns no value. */
--#define VALGRIND_DISCARD_TRANSLATIONS(_qzz_addr,_qzz_len) \
-- {unsigned int _qzz_res; \
-- VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
-- VG_USERREQ__DISCARD_TRANSLATIONS, \
-- _qzz_addr, _qzz_len, 0, 0, 0); \
-- }
--
--
--/* These requests are for getting Valgrind itself to print something.
-- Possibly with a backtrace. This is a really ugly hack. The return value
-- is the number of characters printed, excluding the "**<pid>** " part at the
-- start and the backtrace (if present). */
--
--#if defined(NVALGRIND)
--
--# define VALGRIND_PRINTF(...)
--# define VALGRIND_PRINTF_BACKTRACE(...)
--
--#else /* NVALGRIND */
--
--/* Modern GCC will optimize the static routine out if unused,
-- and unused attribute will shut down warnings about it. */
--static int VALGRIND_PRINTF(const char *format, ...)
-- __attribute__((format(__printf__, 1, 2), __unused__));
--static int
--VALGRIND_PRINTF(const char *format, ...)
--{
-- unsigned long _qzz_res;
-- va_list vargs;
-- va_start(vargs, format);
-- VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,
-- VG_USERREQ__PRINTF_VALIST_BY_REF,
-- (unsigned long)format,
-- (unsigned long)&vargs,
-- 0, 0, 0);
-- va_end(vargs);
-- return (int)_qzz_res;
--}
--
--static int VALGRIND_PRINTF_BACKTRACE(const char *format, ...)
-- __attribute__((format(__printf__, 1, 2), __unused__));
--static int
--VALGRIND_PRINTF_BACKTRACE(const char *format, ...)
--{
-- unsigned long _qzz_res;
-- va_list vargs;
-- va_start(vargs, format);
-- VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,
-- VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF,
-- (unsigned long)format,
-- (unsigned long)&vargs,
-- 0, 0, 0);
-- va_end(vargs);
-- return (int)_qzz_res;
--}
--
--#endif /* NVALGRIND */
--
--
--/* These requests allow control to move from the simulated CPU to the
-- real CPU, calling an arbitary function.
--
-- Note that the current ThreadId is inserted as the first argument.
-- So this call:
--
-- VALGRIND_NON_SIMD_CALL2(f, arg1, arg2)
--
-- requires f to have this signature:
--
-- Word f(Word tid, Word arg1, Word arg2)
--
-- where "Word" is a word-sized type.
--
-- Note that these client requests are not entirely reliable. For example,
-- if you call a function with them that subsequently calls printf(),
-- there's a high chance Valgrind will crash. Generally, your prospects of
-- these working are made higher if the called function does not refer to
-- any global variables, and does not refer to any libc or other functions
-- (printf et al). Any kind of entanglement with libc or dynamic linking is
-- likely to have a bad outcome, for tricky reasons which we've grappled
-- with a lot in the past.
--*/
--#define VALGRIND_NON_SIMD_CALL0(_qyy_fn) \
-- __extension__ \
-- ({unsigned long _qyy_res; \
-- VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \
-- VG_USERREQ__CLIENT_CALL0, \
-- _qyy_fn, \
-- 0, 0, 0, 0); \
-- _qyy_res; \
-- })
--
--#define VALGRIND_NON_SIMD_CALL1(_qyy_fn, _qyy_arg1) \
-- __extension__ \
-- ({unsigned long _qyy_res; \
-- VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \
-- VG_USERREQ__CLIENT_CALL1, \
-- _qyy_fn, \
-- _qyy_arg1, 0, 0, 0); \
-- _qyy_res; \
-- })
--
--#define VALGRIND_NON_SIMD_CALL2(_qyy_fn, _qyy_arg1, _qyy_arg2) \
-- __extension__ \
-- ({unsigned long _qyy_res; \
-- VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \
-- VG_USERREQ__CLIENT_CALL2, \
-- _qyy_fn, \
-- _qyy_arg1, _qyy_arg2, 0, 0); \
-- _qyy_res; \
-- })
--
--#define VALGRIND_NON_SIMD_CALL3(_qyy_fn, _qyy_arg1, _qyy_arg2, _qyy_arg3) \
-- __extension__ \
-- ({unsigned long _qyy_res; \
-- VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \
-- VG_USERREQ__CLIENT_CALL3, \
-- _qyy_fn, \
-- _qyy_arg1, _qyy_arg2, \
-- _qyy_arg3, 0); \
-- _qyy_res; \
-- })
--
--
--/* Counts the number of errors that have been recorded by a tool. Nb:
-- the tool must record the errors with VG_(maybe_record_error)() or
-- VG_(unique_error)() for them to be counted. */
--#define VALGRIND_COUNT_ERRORS \
-- __extension__ \
-- ({unsigned int _qyy_res; \
-- VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \
-- VG_USERREQ__COUNT_ERRORS, \
-- 0, 0, 0, 0, 0); \
-- _qyy_res; \
-- })
--
--/* Several Valgrind tools (Memcheck, Massif, Helgrind, DRD) rely on knowing
-- when heap blocks are allocated in order to give accurate results. This
-- happens automatically for the standard allocator functions such as
-- malloc(), calloc(), realloc(), memalign(), new, new[], free(), delete,
-- delete[], etc.
--
-- But if your program uses a custom allocator, this doesn't automatically
-- happen, and Valgrind will not do as well. For example, if you allocate
-- superblocks with mmap() and then allocates chunks of the superblocks, all
-- Valgrind's observations will be at the mmap() level and it won't know that
-- the chunks should be considered separate entities. In Memcheck's case,
-- that means you probably won't get heap block overrun detection (because
-- there won't be redzones marked as unaddressable) and you definitely won't
-- get any leak detection.
--
-- The following client requests allow a custom allocator to be annotated so
-- that it can be handled accurately by Valgrind.
--
-- VALGRIND_MALLOCLIKE_BLOCK marks a region of memory as having been allocated
-- by a malloc()-like function. For Memcheck (an illustrative case), this
-- does two things:
--
-- - It records that the block has been allocated. This means any addresses
-- within the block mentioned in error messages will be
-- identified as belonging to the block. It also means that if the block
-- isn't freed it will be detected by the leak checker.
--
-- - It marks the block as being addressable and undefined (if 'is_zeroed' is
-- not set), or addressable and defined (if 'is_zeroed' is set). This
-- controls how accesses to the block by the program are handled.
--
-- 'addr' is the start of the usable block (ie. after any
-- redzone), 'sizeB' is its size. 'rzB' is the redzone size if the allocator
-- can apply redzones -- these are blocks of padding at the start and end of
-- each block. Adding redzones is recommended as it makes it much more likely
-- Valgrind will spot block overruns. `is_zeroed' indicates if the memory is
-- zeroed (or filled with another predictable value), as is the case for
-- calloc().
--
-- VALGRIND_MALLOCLIKE_BLOCK should be put immediately after the point where a
-- heap block -- that will be used by the client program -- is allocated.
-- It's best to put it at the outermost level of the allocator if possible;
-- for example, if you have a function my_alloc() which calls
-- internal_alloc(), and the client request is put inside internal_alloc(),
-- stack traces relating to the heap block will contain entries for both
-- my_alloc() and internal_alloc(), which is probably not what you want.
--
-- For Memcheck users: if you use VALGRIND_MALLOCLIKE_BLOCK to carve out
-- custom blocks from within a heap block, B, that has been allocated with
-- malloc/calloc/new/etc, then block B will be *ignored* during leak-checking
-- -- the custom blocks will take precedence.
--
-- VALGRIND_FREELIKE_BLOCK is the partner to VALGRIND_MALLOCLIKE_BLOCK. For
-- Memcheck, it does two things:
--
-- - It records that the block has been deallocated. This assumes that the
-- block was annotated as having been allocated via
-- VALGRIND_MALLOCLIKE_BLOCK. Otherwise, an error will be issued.
--
-- - It marks the block as being unaddressable.
--
-- VALGRIND_FREELIKE_BLOCK should be put immediately after the point where a
-- heap block is deallocated.
--
-- In many cases, these two client requests will not be enough to get your
-- allocator working well with Memcheck. More specifically, if your allocator
-- writes to freed blocks in any way then a VALGRIND_MAKE_MEM_UNDEFINED call
-- will be necessary to mark the memory as addressable just before the zeroing
-- occurs, otherwise you'll get a lot of invalid write errors. For example,
-- you'll need to do this if your allocator recycles freed blocks, but it
-- zeroes them before handing them back out (via VALGRIND_MALLOCLIKE_BLOCK).
-- Alternatively, if your allocator reuses freed blocks for allocator-internal
-- data structures, VALGRIND_MAKE_MEM_UNDEFINED calls will also be necessary.
--
-- Really, what's happening is a blurring of the lines between the client
-- program and the allocator... after VALGRIND_FREELIKE_BLOCK is called, the
-- memory should be considered unaddressable to the client program, but the
-- allocator knows more than the rest of the client program and so may be able
-- to safely access it. Extra client requests are necessary for Valgrind to
-- understand the distinction between the allocator and the rest of the
-- program.
--
-- Note: there is currently no VALGRIND_REALLOCLIKE_BLOCK client request; it
-- has to be emulated with MALLOCLIKE/FREELIKE and memory copying.
--
-- Ignored if addr == 0.
--*/
--#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) \
-- {unsigned int _qzz_res; \
-- VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
-- VG_USERREQ__MALLOCLIKE_BLOCK, \
-- addr, sizeB, rzB, is_zeroed, 0); \
-- }
--
--/* See the comment for VALGRIND_MALLOCLIKE_BLOCK for details.
-- Ignored if addr == 0.
--*/
--#define VALGRIND_FREELIKE_BLOCK(addr, rzB) \
-- {unsigned int _qzz_res; \
-- VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
-- VG_USERREQ__FREELIKE_BLOCK, \
-- addr, rzB, 0, 0, 0); \
-- }
--
--/* Create a memory pool. */
--#define VALGRIND_CREATE_MEMPOOL(pool, rzB, is_zeroed) \
-- {unsigned int _qzz_res; \
-- VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
-- VG_USERREQ__CREATE_MEMPOOL, \
-- pool, rzB, is_zeroed, 0, 0); \
-- }
--
--/* Destroy a memory pool. */
--#define VALGRIND_DESTROY_MEMPOOL(pool) \
-- {unsigned int _qzz_res; \
-- VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
-- VG_USERREQ__DESTROY_MEMPOOL, \
-- pool, 0, 0, 0, 0); \
-- }
--
--/* Associate a piece of memory with a memory pool. */
--#define VALGRIND_MEMPOOL_ALLOC(pool, addr, size) \
-- {unsigned int _qzz_res; \
-- VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
-- VG_USERREQ__MEMPOOL_ALLOC, \
-- pool, addr, size, 0, 0); \
-- }
--
--/* Disassociate a piece of memory from a memory pool. */
--#define VALGRIND_MEMPOOL_FREE(pool, addr) \
-- {unsigned int _qzz_res; \
-- VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
-- VG_USERREQ__MEMPOOL_FREE, \
-- pool, addr, 0, 0, 0); \
-- }
--
--/* Disassociate any pieces outside a particular range. */
--#define VALGRIND_MEMPOOL_TRIM(pool, addr, size) \
-- {unsigned int _qzz_res; \
-- VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
-- VG_USERREQ__MEMPOOL_TRIM, \
-- pool, addr, size, 0, 0); \
-- }
--
--/* Resize and/or move a piece associated with a memory pool. */
--#define VALGRIND_MOVE_MEMPOOL(poolA, poolB) \
-- {unsigned int _qzz_res; \
-- VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
-- VG_USERREQ__MOVE_MEMPOOL, \
-- poolA, poolB, 0, 0, 0); \
-- }
--
--/* Resize and/or move a piece associated with a memory pool. */
--#define VALGRIND_MEMPOOL_CHANGE(pool, addrA, addrB, size) \
-- {unsigned int _qzz_res; \
-- VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
-- VG_USERREQ__MEMPOOL_CHANGE, \
-- pool, addrA, addrB, size, 0); \
-- }
--
--/* Return 1 if a mempool exists, else 0. */
--#define VALGRIND_MEMPOOL_EXISTS(pool) \
-- __extension__ \
-- ({unsigned int _qzz_res; \
-- VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
-- VG_USERREQ__MEMPOOL_EXISTS, \
-- pool, 0, 0, 0, 0); \
-- _qzz_res; \
-- })
--
--/* Mark a piece of memory as being a stack. Returns a stack id. */
--#define VALGRIND_STACK_REGISTER(start, end) \
-- __extension__ \
-- ({unsigned int _qzz_res; \
-- VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
-- VG_USERREQ__STACK_REGISTER, \
-- start, end, 0, 0, 0); \
-- _qzz_res; \
-- })
--
--/* Unmark the piece of memory associated with a stack id as being a
-- stack. */
--#define VALGRIND_STACK_DEREGISTER(id) \
-- {unsigned int _qzz_res; \
-- VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
-- VG_USERREQ__STACK_DEREGISTER, \
-- id, 0, 0, 0, 0); \
-- }
--
--/* Change the start and end address of the stack id. */
--#define VALGRIND_STACK_CHANGE(id, start, end) \
-- {unsigned int _qzz_res; \
-- VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
-- VG_USERREQ__STACK_CHANGE, \
-- id, start, end, 0, 0); \
-- }
--
--/* Load PDB debug info for Wine PE image_map. */
--#define VALGRIND_LOAD_PDB_DEBUGINFO(fd, ptr, total_size, delta) \
-- {unsigned int _qzz_res; \
-- VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
-- VG_USERREQ__LOAD_PDB_DEBUGINFO, \
-- fd, ptr, total_size, delta, 0); \
-- }
--
--
--#undef PLAT_x86_linux
--#undef PLAT_amd64_linux
--#undef PLAT_ppc32_linux
--#undef PLAT_ppc64_linux
--#undef PLAT_arm_linux
--#undef PLAT_ppc32_aix5
--#undef PLAT_ppc64_aix5
--
--#endif /* __VALGRIND_H */
-diff --git a/t/00.re2-tests.t b/t/00.re2-tests.t
-deleted file mode 100644
-index e81a6df..0000000
---- a/t/00.re2-tests.t
-+++ /dev/null
-@@ -1,35 +0,0 @@
--use Test::More;
--no warnings;
--
--# Avoid running for everyone, these tests are quite slow, but useful for CPAN
--# testers to run. cpanminus sets automated testing when it isn't, so ignore
--# in that case too.
--plan skip_all => "Only for automated tests"
-- unless $ENV{AUTOMATED_TESTING} and not $ENV{PERL5_CPANM_IS_RUNNING};
--
--my $make = $ENV{MAKE} || "make";
--
--my @results = qx{$make re2-tests};
--
--for(@results) {
-- if(my($test, $result, $diag) = $_ =~ /^(obj[^ ]+)\s+(PASS|FAIL)(.*)/) {
-- if($result eq 'PASS') {
-- pass $test;
-- } else {
-- fail "$test$diag";
--
-- if($diag =~ /output in (.*)/) {
-- my $file = "re2/$1";
-- open my $log_fh, "<", $file or do { diag "$file: $!"; next };
-- diag <$log_fh>;
-- }
-- }
-- }
--}
--
--if("@results" !~ /PASS|FAIL/) {
-- diag @results;
-- plan skip_all => "Unable to compile RE2 tests for some reason";
--}
--
--done_testing;
--
1.7.1
More information about the scm-commits
mailing list