From db939b56d28a0f4cd46b970f337cb7a172bbd05d Mon Sep 17 00:00:00 2001 From: HoneyryderChuck Date: Wed, 28 Dec 2022 17:28:50 +0000 Subject: [PATCH] Adding HTTPX::Response::Body#filename This returns the filename advertised in the content-disposition header. It reuses the same logic which existed for parsing multipart responses, which itself was based on `rack`'s. --- lib/httpx/plugins/multipart/decoder.rb | 64 +++----------------------- lib/httpx/response.rb | 6 +++ lib/httpx/utils.rb | 30 ++++++++++++ sig/plugins/multipart.rbs | 2 - sig/response.rbs | 2 + sig/utils.rbs | 2 + test/response_test.rb | 27 +++++++++++ 7 files changed, 74 insertions(+), 59 deletions(-) diff --git a/lib/httpx/plugins/multipart/decoder.rb b/lib/httpx/plugins/multipart/decoder.rb index 37bb8ecb..06fd7775 100644 --- a/lib/httpx/plugins/multipart/decoder.rb +++ b/lib/httpx/plugins/multipart/decoder.rb @@ -5,10 +5,6 @@ require "delegate" module HTTPX::Plugins module Multipart - using HTTPX::RegexpExtensions unless Regexp.method_defined?(:match?) - - CRLF = "\r\n" - class FilePart < SimpleDelegator attr_reader :original_filename, :content_type @@ -20,32 +16,14 @@ module HTTPX::Plugins end end - TOKEN = %r{[^\s()<>,;:\\"/\[\]?=]+}.freeze - VALUE = /"(?:\\"|[^"])*"|#{TOKEN}/.freeze - CONDISP = /Content-Disposition:\s*#{TOKEN}\s*/i.freeze - BROKEN_QUOTED = /^#{CONDISP}.*;\s*filename="(.*?)"(?:\s*$|\s*;\s*#{TOKEN}=)/i.freeze - BROKEN_UNQUOTED = /^#{CONDISP}.*;\s*filename=(#{TOKEN})/i.freeze - MULTIPART_CONTENT_TYPE = /Content-Type: (.*)#{CRLF}/ni.freeze - MULTIPART_CONTENT_DISPOSITION = /Content-Disposition:.*;\s*name=(#{VALUE})/ni.freeze - MULTIPART_CONTENT_ID = /Content-ID:\s*([^#{CRLF}]*)/ni.freeze - # Updated definitions from RFC 2231 - ATTRIBUTE_CHAR = %r{[^ \t\v\n\r)(><@,;:\\"/\[\]?='*%]}.freeze - ATTRIBUTE = /#{ATTRIBUTE_CHAR}+/.freeze - SECTION = /\*[0-9]+/.freeze - REGULAR_PARAMETER_NAME = /#{ATTRIBUTE}#{SECTION}?/.freeze - REGULAR_PARAMETER = /(#{REGULAR_PARAMETER_NAME})=(#{VALUE})/.freeze - EXTENDED_OTHER_NAME = /#{ATTRIBUTE}\*[1-9][0-9]*\*/.freeze - EXTENDED_OTHER_VALUE = /%[0-9a-fA-F]{2}|#{ATTRIBUTE_CHAR}/.freeze - EXTENDED_OTHER_PARAMETER = /(#{EXTENDED_OTHER_NAME})=(#{EXTENDED_OTHER_VALUE}*)/.freeze - EXTENDED_INITIAL_NAME = /#{ATTRIBUTE}(?:\*0)?\*/.freeze - EXTENDED_INITIAL_VALUE = /[a-zA-Z0-9-]*'[a-zA-Z0-9-]*'#{EXTENDED_OTHER_VALUE}*/.freeze - EXTENDED_INITIAL_PARAMETER = /(#{EXTENDED_INITIAL_NAME})=(#{EXTENDED_INITIAL_VALUE})/.freeze - EXTENDED_PARAMETER = /#{EXTENDED_INITIAL_PARAMETER}|#{EXTENDED_OTHER_PARAMETER}/.freeze - DISPPARM = /;\s*(?:#{REGULAR_PARAMETER}|#{EXTENDED_PARAMETER})\s*/.freeze - RFC2183 = /^#{CONDISP}(#{DISPPARM})+$/i.freeze - class Decoder + include HTTPX::Utils + + CRLF = "\r\n" BOUNDARY_RE = /;\s*boundary=([^;]+)/i.freeze + MULTIPART_CONTENT_TYPE = /Content-Type: (.*)#{CRLF}/ni.freeze + MULTIPART_CONTENT_DISPOSITION = /Content-Disposition:.*;\s*name=(#{VALUE})/ni.freeze + MULTIPART_CONTENT_ID = /Content-ID:\s*([^#{CRLF}]*)/ni.freeze WINDOW_SIZE = 2 << 14 def initialize(response) @@ -102,7 +80,7 @@ module HTTPX::Plugins name = head[MULTIPART_CONTENT_ID, 1] end - filename = get_filename(head) + filename = HTTPX::Utils.get_filename(head) name = filename || +"#{content_type || "text/plain"}[]" if name.nil? || name.empty? @@ -154,34 +132,6 @@ module HTTPX::Plugins raise Error, "parsing should have been over by now" end until @buffer.empty? end - - def get_filename(head) - filename = nil - case head - when RFC2183 - params = Hash[*head.scan(DISPPARM).flat_map(&:compact)] - - if (filename = params["filename"]) - filename = Regexp.last_match(1) if filename =~ /^"(.*)"$/ - elsif (filename = params["filename*"]) - encoding, _, filename = filename.split("'", 3) - end - when BROKEN_QUOTED, BROKEN_UNQUOTED - filename = Regexp.last_match(1) - end - - return unless filename - - filename = URI::DEFAULT_PARSER.unescape(filename) if filename.scan(/%.?.?/).all? { |s| /%[0-9a-fA-F]{2}/.match?(s) } - - filename.scrub! - - filename = filename.gsub(/\\(.)/, '\1') unless /\\[^\\"]/.match?(filename) - - filename.force_encoding ::Encoding.find(encoding) if encoding - - filename - end end end end diff --git a/lib/httpx/response.rb b/lib/httpx/response.rb index e32f137e..2b02a061 100644 --- a/lib/httpx/response.rb +++ b/lib/httpx/response.rb @@ -181,6 +181,12 @@ module HTTPX end end + def filename + return unless @headers.key?("content-disposition") + + Utils.get_filename(@headers["content-disposition"]) + end + def to_s case @buffer when StringIO diff --git a/lib/httpx/utils.rb b/lib/httpx/utils.rb index 969f93f4..fc982e40 100644 --- a/lib/httpx/utils.rb +++ b/lib/httpx/utils.rb @@ -3,6 +3,12 @@ module HTTPX module Utils using URIExtensions + using HTTPX::RegexpExtensions unless Regexp.method_defined?(:match?) + + TOKEN = %r{[^\s()<>,;:\\"/\[\]?=]+}.freeze + VALUE = /"(?:\\"|[^"])*"|#{TOKEN}/.freeze + FILENAME_REGEX = /\s*filename=(#{VALUE})/.freeze + FILENAME_EXTENSION_REGEX = /\s*filename\*=(#{VALUE})/.freeze module_function @@ -25,6 +31,30 @@ module HTTPX time - Time.now end + def get_filename(header, _prefix_regex = nil) + filename = nil + case header + when FILENAME_REGEX + filename = Regexp.last_match(1) + filename = Regexp.last_match(1) if filename =~ /^"(.*)"$/ + when FILENAME_EXTENSION_REGEX + filename = Regexp.last_match(1) + encoding, _, filename = filename.split("'", 3) + end + + return unless filename + + filename = URI::DEFAULT_PARSER.unescape(filename) if filename.scan(/%.?.?/).all? { |s| /%[0-9a-fA-F]{2}/.match?(s) } + + filename.scrub! + + filename = filename.gsub(/\\(.)/, '\1') unless /\\[^\\"]/.match?(filename) + + filename.force_encoding ::Encoding.find(encoding) if encoding + + filename + end + if RUBY_VERSION < "2.3" def to_uri(uri) diff --git a/sig/plugins/multipart.rbs b/sig/plugins/multipart.rbs index be7d5867..de2b375c 100644 --- a/sig/plugins/multipart.rbs +++ b/sig/plugins/multipart.rbs @@ -68,8 +68,6 @@ module HTTPX def initialize: (Response response) -> void def parse: () -> void - - def get_filename: (String head) -> String? end class FilePart # < SimpleDelegator diff --git a/sig/response.rbs b/sig/response.rbs index dc845e99..4e19a8c1 100644 --- a/sig/response.rbs +++ b/sig/response.rbs @@ -64,6 +64,8 @@ module HTTPX def each: () { (String) -> void } -> void | () -> Enumerable[String] + def filename: () -> String? + def bytesize: () -> (Integer | Float) def empty?: () -> bool def copy_to: (String | File | _Writer destination) -> void diff --git a/sig/utils.rbs b/sig/utils.rbs index c39aa30e..378adcb2 100644 --- a/sig/utils.rbs +++ b/sig/utils.rbs @@ -9,5 +9,7 @@ module HTTPX def self?.elapsed_time: (Integer | Float monotonic_time) -> Float def self?.to_uri: (generic_uri uri) -> URI::Generic + + def self?.get_filename: (String header) -> String? end end \ No newline at end of file diff --git a/test/response_test.rb b/test/response_test.rb index 0c669b77..3e043a12 100644 --- a/test/response_test.rb +++ b/test/response_test.rb @@ -138,6 +138,33 @@ class ResponseTest < Minitest::Test assert body.buffer.is_a?(Tempfile), "body should buffer to file after going over threshold" end + def test_response_body_filename + body = Response::Body.new(Response.new(request, 200, "2.0", {}), Options.new) + assert body.filename.nil? + body = Response::Body.new(Response.new(request, 200, "2.0", { "content-disposition" => "attachment;filename=test.csv" }), Options.new) + assert body.filename == "test.csv" + body = Response::Body.new(Response.new(request, 200, "2.0", { "content-disposition" => "attachment;filename=\"test.csv\"" }), + Options.new) + assert body.filename == "test.csv" + body = Response::Body.new(Response.new(request, 200, "2.0", { + "content-disposition" => "inline; filename=ER886357.pdf; " \ + "creation-date=9/17/2012 1:51:37 PM; " \ + "modification-date=9/17/2012 1:51:37 PM; size=3718678", + }), + Options.new) + assert body.filename == "ER886357.pdf" + + body = Response::Body.new(Response.new(request, 200, "2.0", { "content-disposition" => "attachment; filename*=UTF-8''bar" }), + Options.new) + assert body.filename == "bar" + body = Response::Body.new(Response.new(request, 200, "2.0", { + "content-disposition" => "inline; filename*=UTF-8''%c2%a3%20and%20%e2%82%ac%20rates.pdf", + }), + Options.new) + + assert body.filename == "£ and € rates.pdf" + end + def test_response_decoders json_response = Response.new(request, 200, "2.0", { "content-type" => "application/json" }) json_response << %({"a": "b"})