Merge branch 'master' into add-comment-for-docs

master
Kanezoh 2021-11-16 19:37:53 +09:00
commit fb18b49a3c
25 changed files with 128 additions and 107 deletions

View File

@ -65,4 +65,18 @@ describe "Mechanize Agent test" do
File.exists?("mechanizecr_example.html").should eq true
File.delete("mechanizecr_example.html")
end
it "should set referer header" do
page_url = "http://example.com/form"
agent = Mechanize.new
page = agent.get(page_url)
form = page.forms[0]
form.field_with("name").value = "foo"
form.field_with("email").value = "bar"
page = agent.submit(form)
page.not_nil!.code.should eq 200
agent.request_headers["Referer"].should eq(page_url)
end
end

View File

@ -27,7 +27,7 @@ describe "Form Fields Select List" do
form.selectboxes.size.should eq 1
end
selectbox = form.selectboxes[0].as(MechanizeCr::FormContent::SelectList)
selectbox = form.selectboxes[0].as(Mechanize::FormContent::SelectList)
it "returns selectbox options size" do
selectbox.options.size.should eq 3

View File

@ -35,7 +35,7 @@ class Mechanize
}
def initialize
@agent = MechanizeCr::HTTP::Agent.new
@agent = Mechanize::HTTP::Agent.new
@agent.context = self
@agent.user_agent = USER_AGENT["Mechanize"]
end
@ -51,8 +51,8 @@ class Mechanize
# headers: HTTP::Headers{"Foo" => "Bar"})
# ```
def get(uri : String | URI,
headers = HTTP::Headers.new,
params : Hash(String, String | Array(String)) = Hash(String, String).new) : MechanizeCr::Page
headers = ::HTTP::Headers.new,
params : Hash(String, String | Array(String)) = Hash(String, String).new) : Mechanize::Page
method = :get
page = @agent.fetch uri, method, headers, params
add_to_history(page)
@ -71,17 +71,17 @@ class Mechanize
# headers: HTTP::Headers{"Foo" => "Bar"})
# ```
def post(uri : String | URI,
headers = HTTP::Headers.new,
query : Hash(String, String | Array(String)) = Hash(String, String).new) : MechanizeCr::Page
headers = ::HTTP::Headers.new,
query : Hash(String, String | Array(String)) = Hash(String, String).new) : Mechanize::Page
node = Node.new
node["method"] = "POST"
node["enctype"] = "application/x-www-form-urlencoded"
form = MechanizeCr::Form.new(node)
form = Mechanize::Form.new(node)
query.each do |k, v|
node = Node.new
node["name"] = k
form.fields << MechanizeCr::FormContent::Field.new(node, v)
form.fields << Mechanize::FormContent::Field.new(node, v)
end
post_form(uri, form, headers)
end
@ -125,18 +125,18 @@ class Mechanize
# get the page mechanize last visited.
#
# ```
# agent.current_page => #<MechanizeCr::Page>
# agent.current_page => #<Mechanize::Page>
# ```
def current_page : MechanizeCr::Page
def current_page : Mechanize::Page
@agent.current_page
end
# get the latest page recorded in history, and the page is deleted from history.
#
# ```
# agent.back => #<MechanizeCr::Page>
# agent.back => #<Mechanize::Page>
# ```
def back : MechanizeCr::Page
def back : Mechanize::Page
@agent.history.pop
end
@ -151,7 +151,7 @@ class Mechanize
# form.field_with("foo").value = "bar"
# agent.submit(form)
# ```
def submit(form, button = nil) : MechanizeCr::Page?
def submit(form, button = nil) : Mechanize::Page?
form.add_button_to_query(button) if button
case form.method.upcase
when "POST"
@ -162,19 +162,19 @@ class Mechanize
# parse response. it is used internally.
def parse(uri, response, body)
code = response.not_nil!.status_code
MechanizeCr::Page.new(uri, response, body, code, self)
Mechanize::Page.new(uri, response, body, code, self)
end
# get the history (`MechanizeCr::History`).
# get the history (`Mechanize::History`).
# the requests mechanize send is recorded in this history.
# ```
# agent.history => #<MechanizeCr::History>
# agent.history => #<Mechanize::History>
# ```
def history : MechanizeCr::History
def history : Mechanize::History
@agent.history
end
# add page to history (`MechanizeCr::History`).
# add page to history (`Mechanize::History`).
#
# if you send request, mechanize calls this method and records page,
# so you don't need to call this on your own.
@ -207,7 +207,7 @@ class Mechanize
# link = page.links.first
# page2 = agent.click(link)
# ```
def click(link : MechanizeCr::PageContent::Link) : MechanizeCr::Page
def click(link : Mechanize::PageContent::Link) : Mechanize::Page
href = link.href
get href
end
@ -219,13 +219,13 @@ class Mechanize
# ```
def download(uri : URI | String,
filename : String,
headers = HTTP::Headers.new,
headers = ::HTTP::Headers.new,
params : Hash(String, String | Array(String)) = Hash(String, String).new)
transact do
page = get(uri, headers, params)
case page
when MechanizeCr::File
File.write(filename, page.body)
when Mechanize::File
::File.write(filename, page.body)
end
end
end
@ -233,7 +233,7 @@ class Mechanize
# Runs given block, then resets the page history as it was before.
private def transact
# save the previous history status.
history_backup = MechanizeCr::History.new(@agent.history.max_size, @agent.history.array.dup)
history_backup = Mechanize::History.new(@agent.history.max_size, @agent.history.array.dup)
begin
yield self
ensure
@ -243,7 +243,7 @@ class Mechanize
end
# send POST request from form.
private def post_form(uri, form, headers) : MechanizeCr::Page
private def post_form(uri, form, headers) : Mechanize::Page
cur_page = form.page || (current_page unless history.empty?)
request_data = form.request_data

View File

@ -1,3 +1,5 @@
# Base Error class
class MechanizeCr::Error < RuntimeError
class Mechanize
class Error < RuntimeError
end
end

View File

@ -1,6 +1,6 @@
require "./base_error"
class MechanizeCr::ElementNotFoundError < MechanizeCr::Error
class Mechanize::ElementNotFoundError < Mechanize::Error
getter element : Symbol
getter conditions : String

View File

@ -1,12 +1,14 @@
require "http/client"
class MechanizeCr::File
# property :body, :filename
property :body, :code, uri, :response
class Mechanize
class File
# property :body, :filename
property :body, :code, uri, :response
def initialize(uri : URI, response : ::HTTP::Client::Response, body : String, code : Int32)
@uri = uri
@body = body
@code = code
def initialize(uri : URI, response : ::HTTP::Client::Response, body : String, code : Int32)
@uri = uri
@body = body
@code = code
end
end
end

View File

@ -26,21 +26,21 @@ class MechanizeCr::Form
getter enctype : String
getter method : String
getter name : String
getter page : Page?
getter page : Mechanize::Page?
property action : String
def initialize(node : Node | Lexbor::Node, page : Page? = nil)
def initialize(node : Node | Lexbor::Node, page : Mechanize::Page? = nil)
@enctype = node.fetch("enctype", "application/x-www-form-urlencoded")
@node = node
@fields = Array(FormContent::Field).new
@checkboxes = Array(FormContent::CheckBox).new
@radiobuttons = Array(FormContent::RadioButton).new
@selectboxes = Array(FormContent::MultiSelectList).new
@buttons = Array(FormContent::Button).new
@fields = Array(Mechanize::FormContent::Field).new
@checkboxes = Array(Mechanize::FormContent::CheckBox).new
@radiobuttons = Array(Mechanize::FormContent::RadioButton).new
@selectboxes = Array(Mechanize::FormContent::MultiSelectList).new
@buttons = Array(Mechanize::FormContent::Button).new
@action = node.fetch("action", "")
@method = node.fetch("method", "GET").upcase
@name = node.fetch("name", "")
@clicked_buttons = Array(FormContent::Button).new
@clicked_buttons = Array(Mechanize::FormContent::Button).new
@page = page
# @mech = mech
@ -74,25 +74,25 @@ class MechanizeCr::Form
type = (html_node["type"]? || "text").downcase
case type
when "checkbox"
checkboxes << FormContent::CheckBox.new(html_node, self)
checkboxes << Mechanize::FormContent::CheckBox.new(html_node, self)
when "radio"
radiobuttons << FormContent::RadioButton.new(html_node, self)
radiobuttons << Mechanize::FormContent::RadioButton.new(html_node, self)
when "button"
buttons << FormContent::Button.new(html_node, @node)
buttons << Mechanize::FormContent::Button.new(html_node, @node)
when "submit"
buttons << FormContent::SubmitButton.new(html_node, @node)
buttons << Mechanize::FormContent::SubmitButton.new(html_node, @node)
when "reset"
buttons << FormContent::ResetButton.new(html_node, @node)
buttons << Mechanize::FormContent::ResetButton.new(html_node, @node)
when "image"
buttons << FormContent::ImageButton.new(html_node, @node)
buttons << Mechanize::FormContent::ImageButton.new(html_node, @node)
when "text"
fields << FormContent::Text.new(html_node)
fields << Mechanize::FormContent::Text.new(html_node)
when "hidden"
fields << FormContent::Hidden.new(html_node)
fields << Mechanize::FormContent::Hidden.new(html_node)
when "textarea"
fields << FormContent::Textarea.new(html_node)
fields << Mechanize::FormContent::Textarea.new(html_node)
else
fields << FormContent::Field.new(html_node)
fields << Mechanize::FormContent::Field.new(html_node)
end
end
@ -155,7 +155,7 @@ class MechanizeCr::Form
# raise Mechanize::Error,
# "radiobuttons #{values} are checked in the #{name} group, " \
# "only one is allowed"
raise MechanizeCr::Error.new
raise Mechanize::Error.new
else
successful_controls << checked.first unless checked.empty?
end

View File

@ -1,4 +1,4 @@
class MechanizeCr::FormContent::Button < MechanizeCr::FormContent::Field
class Mechanize::FormContent::Button < Mechanize::FormContent::Field
getter form_node : Node | Lexbor::Node
def initialize(node : Node | Lexbor::Node, form_node : Node | Lexbor::Node, value = nil)

View File

@ -1,4 +1,4 @@
class MechanizeCr::FormContent::CheckBox < MechanizeCr::FormContent::RadioButton
class Mechanize::FormContent::CheckBox < Mechanize::FormContent::RadioButton
def check
@checked = true
end

View File

@ -1,4 +1,4 @@
class MechanizeCr::FormContent::Field
class Mechanize::FormContent::Field
property value : String?
getter name : String
getter type : String
@ -29,7 +29,7 @@ class MechanizeCr::FormContent::Field
def inspect # :nodoc:
"[%s:0x%x type: %s name: %s value: %s]" % [
self.class.name.sub(/MechanizeCr::FormContent::/, "").downcase,
self.class.name.sub(/Mechanize::FormContent::/, "").downcase,
object_id, type, name, value,
]
end

View File

@ -1,2 +1,2 @@
class MechanizeCr::FormContent::Hidden < MechanizeCr::FormContent::Field
class Mechanize::FormContent::Hidden < Mechanize::FormContent::Field
end

View File

@ -1,2 +1,2 @@
class MechanizeCr::FormContent::ImageButton < MechanizeCr::FormContent::Button
class Mechanize::FormContent::ImageButton < Mechanize::FormContent::Button
end

View File

@ -1,6 +1,6 @@
require "./option"
class MechanizeCr::FormContent::MultiSelectList
class Mechanize::FormContent::MultiSelectList
getter node : Lexbor::Node
getter name : String
getter type : String
@ -54,7 +54,7 @@ class MechanizeCr::FormContent::MultiSelectList
def inspect # :nodoc:
"[%s:0x%x type: %s name: %s values: [%s]]" % [
self.class.name.sub(/MechanizeCr::FormContent::/, "").downcase,
self.class.name.sub(/Mechanize::FormContent::/, "").downcase,
object_id, type, name, values.join(','),
]
end

View File

@ -1,4 +1,4 @@
class MechanizeCr::FormContent::Option
class Mechanize::FormContent::Option
getter select_list : FormContent::MultiSelectList
getter node : Lexbor::Node
getter text : String

View File

@ -1,4 +1,4 @@
class MechanizeCr::FormContent::RadioButton < MechanizeCr::FormContent::Field
class Mechanize::FormContent::RadioButton < Mechanize::FormContent::Field
property :checked, :form
def initialize(node : Node | Lexbor::Node, form : Form)

View File

@ -1,2 +1,2 @@
class MechanizeCr::FormContent::ResetButton < MechanizeCr::FormContent::Button
class Mechanize::FormContent::ResetButton < Mechanize::FormContent::Button
end

View File

@ -1,6 +1,6 @@
require "./multi_select_list"
class MechanizeCr::FormContent::SelectList < MechanizeCr::FormContent::MultiSelectList
class Mechanize::FormContent::SelectList < Mechanize::FormContent::MultiSelectList
def initialize(node)
super node
# only one selected option is allowed

View File

@ -1,2 +1,2 @@
class MechanizeCr::FormContent::SubmitButton < MechanizeCr::FormContent::Button
class Mechanize::FormContent::SubmitButton < Mechanize::FormContent::Button
end

View File

@ -1,2 +1,2 @@
class MechanizeCr::FormContent::Text < MechanizeCr::FormContent::Field
class Mechanize::FormContent::Text < Mechanize::FormContent::Field
end

View File

@ -1,2 +1,2 @@
class MechanizeCr::FormContent::Textarea < MechanizeCr::FormContent::Field
class Mechanize::FormContent::Textarea < Mechanize::FormContent::Field
end

View File

@ -2,18 +2,18 @@ require "./page"
# This class represents the history of http response you sent.
# If you send a request, mechanize saves the history.
class MechanizeCr::History
class Mechanize::History
# max page size history can save. default is 100.
# as same as `agent.max_history`.
property max_size : Int32
property array : Array(MechanizeCr::Page)
property array : Array(Mechanize::Page)
delegate :size, :empty?, :last, to: array
delegate :size, :empty?, :last, to: array
def initialize(max_size = 100, array = Array(MechanizeCr::Page).new)
@max_size = max_size
@array = array
end
def initialize(max_size = 100, array = Array(Mechanize::Page).new)
@max_size = max_size
@array = array
end
# add page to history.
def push(page, uri = nil)
@ -21,14 +21,11 @@ class MechanizeCr::History
while size > @max_size
@array.shift
end
self
end
# take the last page out from history.
def pop
if size == 0
# TODO: raise error
end
page = @array.pop
end
end

View File

@ -3,27 +3,28 @@ require "http/client"
require "../cookie"
require "../history"
module MechanizeCr
class Mechanize
module HTTP
class Agent
property :request_headers, :context
property history : MechanizeCr::History
property history : Mechanize::History
property user_agent : String
property request_cookies : ::HTTP::Cookies
def initialize(@context : Mechanize | Nil = nil)
@history = MechanizeCr::History.new
@history = Mechanize::History.new
@request_headers = ::HTTP::Headers.new
@context = context
@request_cookies = ::HTTP::Cookies.new
@user_agent = ""
end
def fetch(uri, method = :get, headers = HTTP::Headers.new, params = Hash(String, String).new,
def fetch(uri, method = :get, headers = ::HTTP::Headers.new, params = Hash(String, String).new,
referer = (current_page unless history.empty?))
uri = resolve_url(uri, referer)
set_request_headers(uri, headers)
set_user_agent
set_request_referer(referer)
uri, params = resolve_parameters(uri, method, params)
response = http_request(uri, method, params)
body = response.not_nil!.body
@ -47,7 +48,7 @@ module MechanizeCr
headers.delete("Content-Type")
headers.delete("Content-Length")
@context.not_nil!.get(uri)
fetch(uri)
end
def http_request(uri, method, params)
@ -94,6 +95,13 @@ module MechanizeCr
end
end
# Sets a Referer header.
def set_request_referer(referer : Mechanize::Page?)
return unless referer
request_headers["Referer"] = referer.uri.to_s
end
private def resolve_parameters(uri, method, params)
case method
when :get

View File

@ -3,26 +3,22 @@ require "./utils/element_matcher"
require "./page/link"
# This class represents the result of http response.
# If you send a request, it returns the instance of `MechanizeCr::Page`.
# If you send a request, it returns the instance of `Mechanize::Page`.
# You can get status code, title, and page body, and search html node using css selector from page instance.
class MechanizeCr::Page < MechanizeCr::File
include MechanizeCr::ElementMatcher
class Mechanize::Page < Mechanize::File
include Mechanize::ElementMatcher
# look at lexbor document.(https://github.com/kostya/lexbor#readme)
delegate :css, to: parser
property mech : Mechanize
def initialize(uri, response, body, code, mech)
@mech = mech
super(uri, response, body, code)
end
property mech : Mechanize
# parser to parse response body.
# TODO: now it's Lexbor::Parser. I want to also support other parsers like JSON.
def parser : Lexbor::Parser
@parser ||= Lexbor::Parser.new(@body)
end
def initialize(uri, response, body, code, mech)
@mech = mech
super(uri, response, body, code)
end
# return page title.
# ```
@ -35,13 +31,12 @@ class MechanizeCr::Page < MechanizeCr::File
else
title_node.first.inner_text
end
end
# return all forms(`MechanizeCr::Form`) in the page.
# return all forms(`Mechanize::Form`) in the page.
# ```
# page.forms # => Array(MechanizeCr::Form)
# page.forms # => Array(Mechanize::Form)
# ```
def forms : Array(MechanizeCr::Form)
def forms : Array(Mechanize::Form)
forms = css("form").map do |html_form|
form = Form.new(html_form, self)
form.action ||= @uri.to_s
@ -49,11 +44,11 @@ class MechanizeCr::Page < MechanizeCr::File
end.to_a
end
# return all links(`MechanizeCr::PageContent::Link`) in the page.
# return all links(`Mechanize::PageContent::Link`) in the page.
# ```
# page.links # => Array(MechanizeCr::PageContent::Link)
# page.links # => Array(Mechanize::PageContent::Link)
# ```
def links : Array(MechanizeCr::PageContent::Link)
def links : Array(Mechanize::PageContent::Link)
links = %w{a area}.map do |tag|
css(tag).map do |node|
PageContent::Link.new(node, @mech, self)
@ -61,5 +56,6 @@ class MechanizeCr::Page < MechanizeCr::File
end.flatten
end
elements_with "form"
elements_with "form"
end
end

View File

@ -1,6 +1,6 @@
class MechanizeCr::PageContent::Link
class Mechanize::PageContent::Link
getter node : Lexbor::Node
getter page : Page
getter page : Mechanize::Page
getter mech : Mechanize
getter href : String
getter text : String

View File

@ -1,5 +1,6 @@
module MechanizeCr::ElementMatcher
macro elements_with(singular, plural = "")
class Mechanize
module ElementMatcher
macro elements_with(singular, plural = "")
{% plural = "#{singular.id}s" if plural.empty? %}
# search {{ plural.id }} which match condition.
#
@ -62,8 +63,9 @@ module MechanizeCr::ElementMatcher
def {{singular.id}}_with(criteria)
f = {{plural.id}}_with(criteria)
# TODO: Write correct error message.
raise ElementNotFoundError.new(:{{singular.id}}, "") if f.empty?
raise Mechanize::ElementNotFoundError.new(:{{singular.id}}, "") if f.empty?
f.first
end
end
end
end