Adding HTTPX::Response::Body#filename

This returns the filename advertised in the content-disposition header.
It reuses the same logic which existed for parsing multipart responses,
which itself was based on `rack`'s.
This commit is contained in:
HoneyryderChuck 2022-12-28 17:28:50 +00:00
parent 9c765385a5
commit db939b56d2
7 changed files with 74 additions and 59 deletions

View File

@ -5,10 +5,6 @@ require "delegate"
module HTTPX::Plugins
module Multipart
using HTTPX::RegexpExtensions unless Regexp.method_defined?(:match?)
CRLF = "\r\n"
class FilePart < SimpleDelegator
attr_reader :original_filename, :content_type
@ -20,32 +16,14 @@ module HTTPX::Plugins
end
end
TOKEN = %r{[^\s()<>,;:\\"/\[\]?=]+}.freeze
VALUE = /"(?:\\"|[^"])*"|#{TOKEN}/.freeze
CONDISP = /Content-Disposition:\s*#{TOKEN}\s*/i.freeze
BROKEN_QUOTED = /^#{CONDISP}.*;\s*filename="(.*?)"(?:\s*$|\s*;\s*#{TOKEN}=)/i.freeze
BROKEN_UNQUOTED = /^#{CONDISP}.*;\s*filename=(#{TOKEN})/i.freeze
MULTIPART_CONTENT_TYPE = /Content-Type: (.*)#{CRLF}/ni.freeze
MULTIPART_CONTENT_DISPOSITION = /Content-Disposition:.*;\s*name=(#{VALUE})/ni.freeze
MULTIPART_CONTENT_ID = /Content-ID:\s*([^#{CRLF}]*)/ni.freeze
# Updated definitions from RFC 2231
ATTRIBUTE_CHAR = %r{[^ \t\v\n\r)(><@,;:\\"/\[\]?='*%]}.freeze
ATTRIBUTE = /#{ATTRIBUTE_CHAR}+/.freeze
SECTION = /\*[0-9]+/.freeze
REGULAR_PARAMETER_NAME = /#{ATTRIBUTE}#{SECTION}?/.freeze
REGULAR_PARAMETER = /(#{REGULAR_PARAMETER_NAME})=(#{VALUE})/.freeze
EXTENDED_OTHER_NAME = /#{ATTRIBUTE}\*[1-9][0-9]*\*/.freeze
EXTENDED_OTHER_VALUE = /%[0-9a-fA-F]{2}|#{ATTRIBUTE_CHAR}/.freeze
EXTENDED_OTHER_PARAMETER = /(#{EXTENDED_OTHER_NAME})=(#{EXTENDED_OTHER_VALUE}*)/.freeze
EXTENDED_INITIAL_NAME = /#{ATTRIBUTE}(?:\*0)?\*/.freeze
EXTENDED_INITIAL_VALUE = /[a-zA-Z0-9-]*'[a-zA-Z0-9-]*'#{EXTENDED_OTHER_VALUE}*/.freeze
EXTENDED_INITIAL_PARAMETER = /(#{EXTENDED_INITIAL_NAME})=(#{EXTENDED_INITIAL_VALUE})/.freeze
EXTENDED_PARAMETER = /#{EXTENDED_INITIAL_PARAMETER}|#{EXTENDED_OTHER_PARAMETER}/.freeze
DISPPARM = /;\s*(?:#{REGULAR_PARAMETER}|#{EXTENDED_PARAMETER})\s*/.freeze
RFC2183 = /^#{CONDISP}(#{DISPPARM})+$/i.freeze
class Decoder
include HTTPX::Utils
CRLF = "\r\n"
BOUNDARY_RE = /;\s*boundary=([^;]+)/i.freeze
MULTIPART_CONTENT_TYPE = /Content-Type: (.*)#{CRLF}/ni.freeze
MULTIPART_CONTENT_DISPOSITION = /Content-Disposition:.*;\s*name=(#{VALUE})/ni.freeze
MULTIPART_CONTENT_ID = /Content-ID:\s*([^#{CRLF}]*)/ni.freeze
WINDOW_SIZE = 2 << 14
def initialize(response)
@ -102,7 +80,7 @@ module HTTPX::Plugins
name = head[MULTIPART_CONTENT_ID, 1]
end
filename = get_filename(head)
filename = HTTPX::Utils.get_filename(head)
name = filename || +"#{content_type || "text/plain"}[]" if name.nil? || name.empty?
@ -154,34 +132,6 @@ module HTTPX::Plugins
raise Error, "parsing should have been over by now"
end until @buffer.empty?
end
def get_filename(head)
filename = nil
case head
when RFC2183
params = Hash[*head.scan(DISPPARM).flat_map(&:compact)]
if (filename = params["filename"])
filename = Regexp.last_match(1) if filename =~ /^"(.*)"$/
elsif (filename = params["filename*"])
encoding, _, filename = filename.split("'", 3)
end
when BROKEN_QUOTED, BROKEN_UNQUOTED
filename = Regexp.last_match(1)
end
return unless filename
filename = URI::DEFAULT_PARSER.unescape(filename) if filename.scan(/%.?.?/).all? { |s| /%[0-9a-fA-F]{2}/.match?(s) }
filename.scrub!
filename = filename.gsub(/\\(.)/, '\1') unless /\\[^\\"]/.match?(filename)
filename.force_encoding ::Encoding.find(encoding) if encoding
filename
end
end
end
end

View File

@ -181,6 +181,12 @@ module HTTPX
end
end
def filename
return unless @headers.key?("content-disposition")
Utils.get_filename(@headers["content-disposition"])
end
def to_s
case @buffer
when StringIO

View File

@ -3,6 +3,12 @@
module HTTPX
module Utils
using URIExtensions
using HTTPX::RegexpExtensions unless Regexp.method_defined?(:match?)
TOKEN = %r{[^\s()<>,;:\\"/\[\]?=]+}.freeze
VALUE = /"(?:\\"|[^"])*"|#{TOKEN}/.freeze
FILENAME_REGEX = /\s*filename=(#{VALUE})/.freeze
FILENAME_EXTENSION_REGEX = /\s*filename\*=(#{VALUE})/.freeze
module_function
@ -25,6 +31,30 @@ module HTTPX
time - Time.now
end
def get_filename(header, _prefix_regex = nil)
filename = nil
case header
when FILENAME_REGEX
filename = Regexp.last_match(1)
filename = Regexp.last_match(1) if filename =~ /^"(.*)"$/
when FILENAME_EXTENSION_REGEX
filename = Regexp.last_match(1)
encoding, _, filename = filename.split("'", 3)
end
return unless filename
filename = URI::DEFAULT_PARSER.unescape(filename) if filename.scan(/%.?.?/).all? { |s| /%[0-9a-fA-F]{2}/.match?(s) }
filename.scrub!
filename = filename.gsub(/\\(.)/, '\1') unless /\\[^\\"]/.match?(filename)
filename.force_encoding ::Encoding.find(encoding) if encoding
filename
end
if RUBY_VERSION < "2.3"
def to_uri(uri)

View File

@ -68,8 +68,6 @@ module HTTPX
def initialize: (Response response) -> void
def parse: () -> void
def get_filename: (String head) -> String?
end
class FilePart # < SimpleDelegator

View File

@ -64,6 +64,8 @@ module HTTPX
def each: () { (String) -> void } -> void
| () -> Enumerable[String]
def filename: () -> String?
def bytesize: () -> (Integer | Float)
def empty?: () -> bool
def copy_to: (String | File | _Writer destination) -> void

View File

@ -9,5 +9,7 @@ module HTTPX
def self?.elapsed_time: (Integer | Float monotonic_time) -> Float
def self?.to_uri: (generic_uri uri) -> URI::Generic
def self?.get_filename: (String header) -> String?
end
end

View File

@ -138,6 +138,33 @@ class ResponseTest < Minitest::Test
assert body.buffer.is_a?(Tempfile), "body should buffer to file after going over threshold"
end
def test_response_body_filename
body = Response::Body.new(Response.new(request, 200, "2.0", {}), Options.new)
assert body.filename.nil?
body = Response::Body.new(Response.new(request, 200, "2.0", { "content-disposition" => "attachment;filename=test.csv" }), Options.new)
assert body.filename == "test.csv"
body = Response::Body.new(Response.new(request, 200, "2.0", { "content-disposition" => "attachment;filename=\"test.csv\"" }),
Options.new)
assert body.filename == "test.csv"
body = Response::Body.new(Response.new(request, 200, "2.0", {
"content-disposition" => "inline; filename=ER886357.pdf; " \
"creation-date=9/17/2012 1:51:37 PM; " \
"modification-date=9/17/2012 1:51:37 PM; size=3718678",
}),
Options.new)
assert body.filename == "ER886357.pdf"
body = Response::Body.new(Response.new(request, 200, "2.0", { "content-disposition" => "attachment; filename*=UTF-8''bar" }),
Options.new)
assert body.filename == "bar"
body = Response::Body.new(Response.new(request, 200, "2.0", {
"content-disposition" => "inline; filename*=UTF-8''%c2%a3%20and%20%e2%82%ac%20rates.pdf",
}),
Options.new)
assert body.filename == "£ and € rates.pdf"
end
def test_response_decoders
json_response = Response.new(request, 200, "2.0", { "content-type" => "application/json" })
json_response << %({"a": "b"})