remove to_regex dependency and the warnings it causes (#128)

grosser · Gergely Brautigam · web-flow · commit 7ec50e623c29 · 2020-11-18T09:35:40.000+01:00
Co-authored-by: Gergely Brautigam &lt;gergely@arangodb.com&gt;
diff --git a/jsonpath.gemspec b/jsonpath.gemspec
@@ -5,7 +5,6 @@ require File.join(File.dirname(__FILE__), 'lib', 'jsonpath', 'version')
 Gem::Specification.new do |s|
   s.name = 'jsonpath'
   s.version = JsonPath::VERSION
-  s.required_rubygems_version = Gem::Requirement.new('>= 0')
   s.required_ruby_version = '>= 2.5'
   s.authors = ['Joshua Hull', 'Gergely Brautigam']
   s.summary = 'Ruby implementation of http://goessner.net/articles/JsonPath/'
@@ -14,17 +13,12 @@ Gem::Specification.new do |s|
   s.extra_rdoc_files = ['README.md']
   s.files = `git ls-files`.split("\n")
   s.homepage = 'https://github.com/joshbuddy/jsonpath'
-  s.rdoc_options = ['--charset=UTF-8']
-  s.require_paths = ['lib']
-  s.rubygems_version = '1.3.7'
   s.test_files = `git ls-files`.split("\n").select { |f| f =~ /^spec/ }
-  s.rubyforge_project = 'jsonpath'
   s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
   s.licenses    = ['MIT']
 
   # dependencies
   s.add_runtime_dependency 'multi_json'
-  s.add_runtime_dependency 'to_regexp', '~> 0.2.1'
   s.add_development_dependency 'bundler'
   s.add_development_dependency 'code_stats'
   s.add_development_dependency 'minitest', '~> 2.2.0'
diff --git a/lib/jsonpath/parser.rb b/lib/jsonpath/parser.rb
@@ -1,11 +1,12 @@
 # frozen_string_literal: true
 
 require 'strscan'
-require 'to_regexp'
 
 class JsonPath
   # Parser parses and evaluates an expression passed to @_current_node.
   class Parser
+    REGEX = /\A\/(.+)\/([imxnesu]*)\z|\A%r{(.+)}([imxnesu]*)\z/
+
     def initialize(node)
       @_current_node = node
       @_expr_map = {}
@@ -71,13 +72,16 @@ def parse_exp(exp)
         elsif (t = scanner.scan(/(\s+)?'?.*'?(\s+)?/))
           # If we encounter a node which does not contain `'` it means
           #  that we are dealing with a boolean type.
-          operand = if t == 'true'
-                      true
-                    elsif t == 'false'
-                      false
-                    else
-                      operator.to_s.strip == '=~' ? t.to_regexp : t.gsub(%r{^'|'$}, '').strip
-                    end
+          operand =
+            if t == 'true'
+              true
+            elsif t == 'false'
+              false
+            elsif operator.to_s.strip == '=~'
+              parse_regex(t)
+            else
+              t.gsub(%r{^'|'$}, '').strip
+            end
         elsif (t = scanner.scan(/\/\w+\//))
         elsif (t = scanner.scan(/.*/))
           raise "Could not process symbol: #{t}"
@@ -102,6 +106,31 @@ def parse_exp(exp)
 
     private
 
+    # /foo/i -> Regex.new("foo", Regexp::IGNORECASE) without using eval
+    # also supports %r{foo}i
+    # following https://github.com/seamusabshere/to_regexp/blob/master/lib/to_regexp.rb
+    def parse_regex(t)
+      t =~ REGEX
+      content = $1 || $3
+      options = $2 || $4
+
+      raise "unsupported regex #{t} use /foo/ style" if !content || !options
+
+      content = content.gsub '\\/', '/'
+
+      flags = 0
+      flags |= Regexp::IGNORECASE if options.include?('i')
+      flags |= Regexp::MULTILINE if options.include?('m')
+      flags |= Regexp::EXTENDED if options.include?('x')
+
+      # 'n' = none, 'e' = EUC, 's' = SJIS, 'u' = UTF-8
+      lang = options.scan(/[nes]/).join.downcase # ignores u since that is default and causes a warning
+
+      args = [content, flags]
+      args << lang unless lang.empty? # avoid warning
+      Regexp.new(*args)
+    end
+
     #  This will break down a parenthesis from the left to the right
     #  and replace the given expression with it's returned value.
     # It does this in order to make it easy to eliminate groups