From 7473af6d9d2aa43bffa37299e9ddbd033431f5aa Mon Sep 17 00:00:00 2001 From: HoneyryderChuck Date: Wed, 21 Jun 2023 23:09:37 +0100 Subject: [PATCH] removed punycode ruby implementation inherited from domain_name it's IDNA 2003 compliant only, and people can already load idnx optionally. --- .rubocop.yml | 1 - .simplecov | 1 - LICENSE.txt | 48 ------- lib/httpx/domain_name.rb | 15 +- lib/httpx/punycode.rb | 300 ++------------------------------------- 5 files changed, 13 insertions(+), 352 deletions(-) diff --git a/.rubocop.yml b/.rubocop.yml index 96fb97a9..40537e48 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -23,7 +23,6 @@ AllCops: - 'vendor/**/*' - 'www/**/*' - 'lib/httpx/extensions.rb' - - 'lib/httpx/punycode.rb' # Do not lint ffi block, for openssl parity - 'test/extensions/response_pattern_match.rb' diff --git a/.simplecov b/.simplecov index 672c0dfe..80c6eed4 100644 --- a/.simplecov +++ b/.simplecov @@ -6,5 +6,4 @@ SimpleCov.start do add_filter "/integration_tests/" add_filter "/regression_tests/" add_filter "/lib/httpx/plugins/internal_telemetry.rb" - add_filter "/lib/httpx/punycode.rb" end diff --git a/LICENSE.txt b/LICENSE.txt index 6bac983a..9c8e20ab 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -189,51 +189,3 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. - - -* lib/httpx/domain_name.rb - -This file is derived from the implementation of punycode available at -here: - -https://www.verisign.com/en_US/channel-resources/domain-registry-products/idn-sdks/index.xhtml - -Copyright (C) 2000-2002 Verisign Inc., All rights reserved. - -Redistribution and use in source and binary forms, with or -without modification, are permitted provided that the following -conditions are met: - - 1) Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - 2) Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - - 3) Neither the name of the VeriSign Inc. nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. - -This software is licensed under the BSD open source license. For more -information visit www.opensource.org. - -Authors: - John Colosi (VeriSign) - Srikanth Veeramachaneni (VeriSign) - Nagesh Chigurupati (Verisign) - Praveen Srinivasan(Verisign) \ No newline at end of file diff --git a/lib/httpx/domain_name.rb b/lib/httpx/domain_name.rb index c29e6094..2aa3c055 100644 --- a/lib/httpx/domain_name.rb +++ b/lib/httpx/domain_name.rb @@ -51,8 +51,6 @@ module HTTPX # non-canonical domain. attr_reader :domain - DOT = "." # :nodoc: - class << self def new(domain) return domain if domain.is_a?(self) @@ -73,7 +71,7 @@ module HTTPX def initialize(hostname) hostname = String(hostname) - raise ArgumentError, "domain name must not start with a dot: #{hostname}" if hostname.start_with?(DOT) + raise ArgumentError, "domain name must not start with a dot: #{hostname}" if hostname.start_with?(".") begin @ipaddr = IPAddr.new(hostname) @@ -84,7 +82,7 @@ module HTTPX end @hostname = DomainName.normalize(hostname) - tld = if (last_dot = @hostname.rindex(DOT)) + tld = if (last_dot = @hostname.rindex(".")) @hostname[(last_dot + 1)..-1] else @hostname @@ -94,7 +92,7 @@ module HTTPX @domain = if last_dot # fallback - accept cookies down to second level # cf. http://www.dkim-reputation.org/regdom-libs/ - if (penultimate_dot = @hostname.rindex(DOT, last_dot - 1)) + if (penultimate_dot = @hostname.rindex(".", last_dot - 1)) @hostname[(penultimate_dot + 1)..-1] else @hostname @@ -126,17 +124,12 @@ module HTTPX @domain && self <= domain && domain <= @domain end - # def ==(other) - # other = DomainName.new(other) - # other.hostname == @hostname - # end - def <=>(other) other = DomainName.new(other) othername = other.hostname if othername == @hostname 0 - elsif @hostname.end_with?(othername) && @hostname[-othername.size - 1, 1] == DOT + elsif @hostname.end_with?(othername) && @hostname[-othername.size - 1, 1] == "." # The other is higher -1 else diff --git a/lib/httpx/punycode.rb b/lib/httpx/punycode.rb index c4946d81..42bd0289 100644 --- a/lib/httpx/punycode.rb +++ b/lib/httpx/punycode.rb @@ -1,304 +1,22 @@ # frozen_string_literal: true module HTTPX - begin - require "idnx" + module Punycode + module_function - module Punycode - module_function + begin + require "idnx" def encode_hostname(hostname) Idnx.to_punycode(hostname) end - end - - rescue LoadError - # :nocov: - # -*- coding: utf-8 -*- - #-- - # punycode.rb - PunyCode encoder for the Domain Name library - # - # Copyright (C) 2011-2017 Akinori MUSHA, All rights reserved. - # - # Ported from puny.c, a part of VeriSign XCode (encode/decode) IDN - # Library. - # - # Copyright (C) 2000-2002 Verisign Inc., All rights reserved. - # - # Redistribution and use in source and binary forms, with or - # without modification, are permitted provided that the following - # conditions are met: - # - # 1) Redistributions of source code must retain the above copyright - # notice, this list of conditions and the following disclaimer. - # - # 2) Redistributions in binary form must reproduce the above copyright - # notice, this list of conditions and the following disclaimer in - # the documentation and/or other materials provided with the - # distribution. - # - # 3) Neither the name of the VeriSign Inc. nor the names of its - # contributors may be used to endorse or promote products derived - # from this software without specific prior written permission. - # - # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - # COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - # OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - # POSSIBILITY OF SUCH DAMAGE. - # - # This software is licensed under the BSD open source license. For more - # information visit www.opensource.org. - # - # Authors: - # John Colosi (VeriSign) - # Srikanth Veeramachaneni (VeriSign) - # Nagesh Chigurupati (Verisign) - # Praveen Srinivasan(Verisign) - #++ - module Punycode - BASE = 36 - TMIN = 1 - TMAX = 26 - SKEW = 38 - DAMP = 700 - INITIAL_BIAS = 72 - INITIAL_N = 0x80 - DELIMITER = "-" - - MAXINT = (1 << 32) - 1 - - LOBASE = BASE - TMIN - CUTOFF = LOBASE * TMAX / 2 - - RE_NONBASIC = /[^\x00-\x7f]/.freeze - - # Returns the numeric value of a basic code point (for use in - # representing integers) in the range 0 to base-1, or nil if cp - # is does not represent a value. - DECODE_DIGIT = {}.tap do |map| - # ASCII A..Z map to 0..25 - # ASCII a..z map to 0..25 - (0..25).each { |i| map[65 + i] = map[97 + i] = i } - # ASCII 0..9 map to 26..35 - (26..35).each { |i| map[22 + i] = i } - end - - # Returns the basic code point whose value (when used for - # representing integers) is d, which must be in the range 0 to - # BASE-1. The lowercase form is used unless flag is true, in - # which case the uppercase form is used. The behavior is - # undefined if flag is nonzero and digit d has no uppercase - # form. - ENCODE_DIGIT = proc { |d, flag| - (d + 22 + (d < 26 ? 75 : 0) - (flag ? (1 << 5) : 0)).chr - # 0..25 map to ASCII a..z or A..Z - # 26..35 map to ASCII 0..9 - } - - DOT = "." - PREFIX = "xn--" - - # Most errors we raise are basically kind of ArgumentError. - class ArgumentError < ::ArgumentError; end - class BufferOverflowError < ArgumentError; end - - module_function - - # Encode a +string+ in Punycode - def encode(string) - input = string.unpack("U*") - output = +"" - - # Initialize the state - n = INITIAL_N - delta = 0 - bias = INITIAL_BIAS - - # Handle the basic code points - input.each { |cp| output << cp.chr if cp < 0x80 } - - h = b = output.length - - # h is the number of code points that have been handled, b is the - # number of basic code points, and out is the number of characters - # that have been output. - - output << DELIMITER if b > 0 - - # Main encoding loop - - while h < input.length - # All non-basic code points < n have been handled already. Find - # the next larger one - - m = MAXINT - input.each do |cp| - m = cp if (n...m) === cp - end - - # Increase delta enough to advance the decoder's state to - # , but guard against overflow - - delta += (m - n) * (h + 1) - raise BufferOverflowError if delta > MAXINT - - n = m - - input.each do |cp| - # AMC-ACE-Z can use this simplified version instead - if cp < n - delta += 1 - raise BufferOverflowError if delta > MAXINT - elsif cp == n - # Represent delta as a generalized variable-length integer - q = delta - k = BASE - loop do - t = k <= bias ? TMIN : k - bias >= TMAX ? TMAX : k - bias - break if q < t - - q, r = (q - t).divmod(BASE - t) - output << ENCODE_DIGIT[t + r, false] - k += BASE - end - - output << ENCODE_DIGIT[q, false] - - # Adapt the bias - delta = h == b ? delta / DAMP : delta >> 1 - delta += delta / (h + 1) - bias = 0 - while delta > CUTOFF - delta /= LOBASE - bias += BASE - end - bias += (LOBASE + 1) * delta / (delta + SKEW) - - delta = 0 - h += 1 - end - end - - delta += 1 - n += 1 - end - - output - end - - # Encode a hostname using IDN/Punycode algorithms + rescue LoadError def encode_hostname(hostname) - hostname.match(RE_NONBASIC) || (return hostname) + warn "#{hostname} cannot be converted to punycode. Install the " \ + "\"idnx\" gem: https://github.com/HoneyryderChuck/idnx" - hostname.split(DOT).map do |name| - if name.match(RE_NONBASIC) - PREFIX + encode(name) - else - name - end - end.join(DOT) - end - - # Decode a +string+ encoded in Punycode - def decode(string) - # Initialize the state - n = INITIAL_N - i = 0 - bias = INITIAL_BIAS - - if j = string.rindex(DELIMITER) - b = string[0...j] - - b.match(RE_NONBASIC) && - raise(ArgumentError, "Illegal character is found in basic part: #{string.inspect}") - - # Handle the basic code points - - output = b.unpack("U*") - u = string[(j + 1)..-1] - else - output = [] - u = string - end - - # Main decoding loop: Start just after the last delimiter if any - # basic code points were copied; start at the beginning - # otherwise. - - input = u.unpack("C*") - input_length = input.length - h = 0 - out = output.length - - while h < input_length - # Decode a generalized variable-length integer into delta, - # which gets added to i. The overflow checking is easier - # if we increase i as we go, then subtract off its starting - # value at the end to obtain delta. - - oldi = i - w = 1 - k = BASE - - loop do - (digit = DECODE_DIGIT[input[h]]) || - raise(ArgumentError, "Illegal character is found in non-basic part: #{string.inspect}") - h += 1 - i += digit * w - raise BufferOverflowError if i > MAXINT - - t = k <= bias ? TMIN : k - bias >= TMAX ? TMAX : k - bias - break if digit < t - - w *= BASE - t - raise BufferOverflowError if w > MAXINT - - k += BASE - (h < input_length) || raise(ArgumentError, "Malformed input given: #{string.inspect}") - end - - # Adapt the bias - delta = oldi == 0 ? i / DAMP : (i - oldi) >> 1 - delta += delta / (out + 1) - bias = 0 - while delta > CUTOFF - delta /= LOBASE - bias += BASE - end - bias += (LOBASE + 1) * delta / (delta + SKEW) - - # i was supposed to wrap around from out+1 to 0, incrementing - # n each time, so we'll fix that now: - - q, i = i.divmod(out + 1) - n += q - raise BufferOverflowError if n > MAXINT - - # Insert n at position i of the output: - - output[i, 0] = n - - out += 1 - i += 1 - end - output.pack("U*") - end - - # Decode a hostname using IDN/Punycode algorithms - def decode_hostname(hostname) - hostname.gsub(/(\A|#{Regexp.quote(DOT)})#{Regexp.quote(PREFIX)}([^#{Regexp.quote(DOT)}]*)/o) do - Regexp.last_match(1) << decode(Regexp.last_match(2)) - end + hostname end end - # :nocov: end -end \ No newline at end of file +end