Compare commits

..

No commits in common. "d4f7054a88ac56310381235d398960cd7ee3c2c7" and "64bccad4954253d0b4c35bfd70dd3e707fbfada9" have entirely different histories.

15 changed files with 32 additions and 95 deletions

View File

@ -4,8 +4,7 @@
This project is inspired by Ruby's [mechanize](https://github.com/sparklemotion/mechanize). This project is inspired by Ruby's [mechanize](https://github.com/sparklemotion/mechanize).
The purpose is to cover all the features of original one. The purpose is to cover all the features of original one.
Now, mechanize.cr can automatically store and send cookies, fill and submit forms.
[API Documentation](https://kanezoh.github.io/mechanize.cr/)
## Installation ## Installation
@ -21,7 +20,7 @@ The purpose is to cover all the features of original one.
## Usage ## Usage
### GET request ### simple GET request
```crystal ```crystal
require "mechanize" require "mechanize"
@ -96,17 +95,6 @@ For activation, simply setup the log to `:debug` level
Log.setup("mechanize", :debug) Log.setup("mechanize", :debug)
``` ```
### Basic auth
You can access a page which is protected by basic auth, setting username and password for the url.
```crystal
agent = Mechanize.new
agent.add_auth("http://example.com", "username", "password")
agent.get("http://example.com")
```
## Contributing ## Contributing
1. Fork it (<https://github.com/Kanezoh/mechanize.cr/fork>) 1. Fork it (<https://github.com/Kanezoh/mechanize.cr/fork>)

13
main.cr Normal file
View File

@ -0,0 +1,13 @@
require "./src/mechanize.cr"
agent = Mechanize.new
agent.request_headers = HTTP::Headers{"Foo" => "Bar"}
params = {"hoge" => "hoge"}
page = agent.get("http://example.com/", params: params)
# form = page.forms[0]
# query = {"foo" => "foo_value", "bar" => "bar_value"}
# page = agent.post("http://example.com/", query: query)
# puts page.code
# puts page.body
# puts page.css("h1").first.inner_text
# puts page.title

View File

@ -11,8 +11,6 @@ license: MIT
dependencies: dependencies:
lexbor: lexbor:
github: kostya/lexbor github: kostya/lexbor
http_proxy:
github: mamantoha/http_proxy
development_dependencies: development_dependencies:
webmock: webmock:
@ -20,6 +18,6 @@ development_dependencies:
branch: master branch: master
kemal: kemal:
github: kemalcr/kemal github: kemalcr/kemal
version: ~> 1.3.0 version: ~> 1.0.0
kemal-basic-auth: kemal-basic-auth:
github: kemalcr/kemal-basic-auth github: kemalcr/kemal-basic-auth

View File

@ -1,6 +1,5 @@
require "./spec_helper" require "./spec_helper"
WebMock.stub(:get, "http://example.com/?foo=bar&foo1=bar2") WebMock.stub(:get, "http://example.com/?foo=bar&foo1=bar2")
WebMock.stub(:get, "http://example.com/path?foo=bar&foo1=bar2")
WebMock.stub(:post, "http://example.com/post") WebMock.stub(:post, "http://example.com/post")
.with(body: "email=foobar", headers: {"Content-Type" => "application/x-www-form-urlencoded"}) .with(body: "email=foobar", headers: {"Content-Type" => "application/x-www-form-urlencoded"})
.to_return(body: "success") .to_return(body: "success")
@ -41,14 +40,6 @@ describe "Mechanize HTTP test" do
page.uri.to_s.should eq uri page.uri.to_s.should eq uri
end end
it "GET with query parameter with path" do
agent = Mechanize.new
uri = "http://example.com/path?foo=bar&foo1=bar2"
page = agent.get(uri)
page.code.should eq 200
page.uri.to_s.should eq uri
end
it "simple POST" do it "simple POST" do
agent = Mechanize.new agent = Mechanize.new
query = {"email" => "foobar"} query = {"email" => "foobar"}

View File

@ -1,17 +0,0 @@
require "./spec_helper"
WebMock.stub(:get, "http://example.com/with_proxy").to_return(body: "success")
describe "Mechanize proxy test" do
it "set proxy" do
with_proxy_server do |host, port, wants_close|
agent = Mechanize.new
agent.set_proxy("127.0.0.1", 8080)
page = agent.get("http://example.com/with_proxy")
page.body.should eq("success")
page.code.should eq(200)
ensure
wants_close.send(nil)
end
end
end

View File

@ -1,6 +1,5 @@
require "spec" require "spec"
require "webmock" require "webmock"
require "http_proxy"
require "../src/mechanize" require "../src/mechanize"
WebMock.stub(:get, "example.com") WebMock.stub(:get, "example.com")
@ -47,22 +46,3 @@ WebMock.stub(:post, "example.com/post_path")
WebMock.stub(:post, "example.com/post_path") WebMock.stub(:post, "example.com/post_path")
.with(body: "name=foo&email=bar&commit=submit", headers: {"Content-Type" => "application/x-www-form-urlencoded"}) .with(body: "name=foo&email=bar&commit=submit", headers: {"Content-Type" => "application/x-www-form-urlencoded"})
.to_return(body: "success with button") .to_return(body: "success with button")
def with_proxy_server(host = "127.0.0.1", port = 8080, &)
wants_close = Channel(Nil).new
server = HTTP::Proxy::Server.new
spawn do
server.bind_tcp(host, port)
server.listen
end
spawn do
wants_close.receive
server.close
end
Fiber.yield
yield host, port, wants_close
end

View File

@ -2,7 +2,6 @@ require "log"
require "uri" require "uri"
require "http/client" require "http/client"
require "lexbor" require "lexbor"
require "http_proxy"
require "./mechanize/http/agent" require "./mechanize/http/agent"
require "./mechanize/form" require "./mechanize/form"
require "./mechanize/node" require "./mechanize/node"
@ -299,18 +298,13 @@ class Mechanize
# set basic auth credentials. # set basic auth credentials.
# ``` # ```
# # set an auth credential with a specific url. # # make download.html whose content is http://example.com's html.
# agent.add_auth("http://example.com", "username", "password") # agent.add_auth("http://example.com", "username", "password")
# ``` # ```
def add_auth(uri : String, user : String, pass : String) def add_auth(uri : String, user : String, pass : String)
@agent.add_auth(uri, user, pass) @agent.add_auth(uri, user, pass)
end end
# Sets the proxy +address+ at +port+ with an optional +user+ and +password+
def set_proxy(address : String, port : Int32, user : String? = nil, password : String? = nil)
@agent.set_proxy(address, port, user, password)
end
# Runs given block, then resets the page history as it was before. # Runs given block, then resets the page history as it was before.
private def transact private def transact
# save the previous history status. # save the previous history status.

View File

@ -193,6 +193,7 @@ class Mechanize
# submitted with multiple buttons, pass each button to this method. # submitted with multiple buttons, pass each button to this method.
def add_button_to_query(button) def add_button_to_query(button)
unless button.form_node == @node unless button.form_node == @node
message = ""
"#{button.inspect} does not belong to the same page as " \ "#{button.inspect} does not belong to the same page as " \
"the form #{@name.inspect} in #{@page.try &.uri}" "the form #{@name.inspect} in #{@page.try &.uri}"
message = "not a valid button" message = "not a valid button"

View File

@ -23,14 +23,10 @@ class Mechanize::FormContent::SelectList < Mechanize::FormContent::MultiSelectLi
end end
def value=(new_value) def value=(new_value)
@values = new_value values = new_value
end end
def query_value def query_value
if v = value value ? [[name, value.not_nil!]] : nil
[[name, v]]
else
nil
end
end end
end end

View File

@ -30,7 +30,7 @@ class Mechanize
if size == 0 if size == 0
# TODO: raise error # TODO: raise error
end end
@array.pop page = @array.pop
end end
end end
end end

View File

@ -16,8 +16,6 @@ class Mechanize
getter authenticate_methods : Hash(URI, Hash(String, Array(AuthRealm))) getter authenticate_methods : Hash(URI, Hash(String, Array(AuthRealm)))
getter authenticate_parser : WWWAuthenticateParser getter authenticate_parser : WWWAuthenticateParser
@proxy : ::HTTP::Proxy::Client?
def initialize(@context : Mechanize? = nil) def initialize(@context : Mechanize? = nil)
@history = History.new @history = History.new
@request_headers = ::HTTP::Headers.new @request_headers = ::HTTP::Headers.new
@ -45,7 +43,6 @@ class Mechanize
uri, params = resolve_parameters(uri, method, params) uri, params = resolve_parameters(uri, method, params)
client = ::HTTP::Client.new(uri) client = ::HTTP::Client.new(uri)
request_auth client, uri request_auth client, uri
client.set_proxy(@proxy) if @proxy
response = http_request(client, uri, method, params, body) response = http_request(client, uri, method, params, body)
body = response.not_nil!.body body = response.not_nil!.body
page = response_parse(response, body, uri) page = response_parse(response, body, uri)
@ -69,7 +66,7 @@ class Mechanize
redirect_url = response.headers["location"] redirect_url = response.headers["location"]
uri = resolve_url(redirect_url, referer) uri = resolve_url(redirect_url, referer)
Log.debug { "follow redirect to: #{uri}" } Log.debug { "follow redirect to: #{uri.to_s}" }
# Make sure we are not copying over the POST headers from the original request # Make sure we are not copying over the POST headers from the original request
headers.delete("Content-MD5") headers.delete("Content-MD5")
@ -87,7 +84,6 @@ class Mechanize
body : String?) : ::HTTP::Client::Response? body : String?) : ::HTTP::Client::Response?
request_log(uri, method) request_log(uri, method)
path = uri.path path = uri.path
path += "?#{uri.query.not_nil!}" if uri.query
case uri.scheme.not_nil!.downcase case uri.scheme.not_nil!.downcase
when "http", "https" when "http", "https"
@ -166,17 +162,13 @@ class Mechanize
# set basic auth credentials. # set basic auth credentials.
# ``` # ```
# # set an auth credential with a specific url. # # make download.html whose content is http://example.com's html.
# agent.add_auth("http://example.com", "username", "password") # agent.add_auth("http://example.com", "username", "password")
# ``` # ```
def add_auth(uri : String, user : String, pass : String) def add_auth(uri : String, user : String, pass : String)
@auth_store.add_auth(uri, user, pass) @auth_store.add_auth(uri, user, pass)
end end
def set_proxy(address : String, port : Int32, user : String? = nil, password : String? = nil)
@proxy = ::HTTP::Proxy::Client.new(address, port, username: user, password: password)
end
private def set_request_headers(uri, headers) private def set_request_headers(uri, headers)
reset_request_header_cookies reset_request_header_cookies
headers.each do |k, v| headers.each do |k, v|
@ -325,6 +317,7 @@ class Mechanize
# extract valid cookies according to URI # extract valid cookies according to URI
private def valid_cookies(uri) private def valid_cookies(uri)
host = uri.host
valid_cookies = ::HTTP::Cookies.new valid_cookies = ::HTTP::Cookies.new
request_cookies.each do |cookie| request_cookies.each do |cookie|
valid_cookies << cookie if cookie.valid_cookie?(uri) valid_cookies << cookie if cookie.valid_cookie?(uri)
@ -333,7 +326,7 @@ class Mechanize
end end
private def request_log(uri, method) private def request_log(uri, method)
Log.debug { "#{method.to_s.upcase}: #{uri}" } Log.debug { "#{method.to_s.upcase}: #{uri.to_s}" }
request_headers.each do |key, values| request_headers.each do |key, values|
value = values.size == 1 ? values.first : values value = values.size == 1 ? values.first : values

View File

@ -8,7 +8,6 @@ class Mechanize
@auth_accounts = Hash(URI, Hash(String, Array(String))).new @auth_accounts = Hash(URI, Hash(String, Array(String))).new
end end
# set an auth credential with a specific url.
def add_auth(uri : String | URI, user : String, pass : String, realm : String? = nil, domain : String? = nil) def add_auth(uri : String | URI, user : String, pass : String, realm : String? = nil, domain : String? = nil)
target_uri = uri.dup target_uri = uri.dup
unless uri.is_a?(URI) unless uri.is_a?(URI)
@ -29,6 +28,7 @@ class Mechanize
end end
end end
##
# Returns true if credentials exist for the +challenges+ from the server at # Returns true if credentials exist for the +challenges+ from the server at
# +uri+. # +uri+.

View File

@ -127,7 +127,7 @@ class Mechanize
return nil unless value return nil unless value
[name, value] return [name, value]
end end
## ##

View File

@ -43,11 +43,11 @@ class Mechanize
# page.forms # => Array(Mechanize::Form) # page.forms # => Array(Mechanize::Form)
# ``` # ```
def forms : Array(Form) def forms : Array(Form)
css("form").map do |html_form| forms = css("form").map do |html_form|
form = Form.new(html_form, self) form = Form.new(html_form, self)
form.action ||= @uri.to_s form.action ||= @uri.to_s
form form
end end.to_a
end end
# return all links(`Mechanize::PageContent::Link`) in the page. # return all links(`Mechanize::PageContent::Link`) in the page.
@ -55,11 +55,11 @@ class Mechanize
# page.links # => Array(Mechanize::PageContent::Link) # page.links # => Array(Mechanize::PageContent::Link)
# ``` # ```
def links : Array(PageContent::Link) def links : Array(PageContent::Link)
%w{a area}.flat_map do |tag| links = %w{a area}.map do |tag|
css(tag).map do |node| css(tag).map do |node|
PageContent::Link.new(node, @mech, self) PageContent::Link.new(node, @mech, self)
end end
end end.flatten
end end
elements_with "form" elements_with "form"