Compare commits

...

11 Commits

Author SHA1 Message Date
Kanezoh
d4f7054a88
Merge pull request #27 from mamantoha/set-proxy
add proxy support
2024-04-27 08:00:58 +09:00
Anton Maminov
64da39eb9b add proxy support 2023-01-08 19:45:22 +02:00
Kanezoh
48001b0404
Merge pull request #26 from mamantoha/develop
bump kemal
2022-11-07 14:28:23 +09:00
Anton Maminov
c783485d9a cosmetic changes 2022-11-04 15:19:48 +02:00
Anton Maminov
cc6f42b796 fix redundant use of Object#to_s in interpolation 2022-11-04 14:34:42 +02:00
Anton Maminov
230deb68dc bump kemal 2022-11-04 14:24:40 +02:00
Kanezoh
be4d845921
Update README.md 2022-01-24 21:25:58 +09:00
Kanezoh
e72fae8516 improve comment 2022-01-15 12:32:31 +09:00
Kanezoh
cb3b03873a delete an unnecessary file 2022-01-15 12:32:21 +09:00
Kanezoh
f44ed2eea6
Merge pull request #24 from Kanezoh/fix-query-parameter-with-path
fix unexpected behaviour when get uri with path and query parameter
2022-01-10 18:56:25 +09:00
Kanezoh
2c73aa2bcf fix unexpected behaviour when get uri with path and query parameter 2022-01-10 10:44:27 +09:00
15 changed files with 95 additions and 32 deletions

View File

@ -4,7 +4,8 @@
This project is inspired by Ruby's [mechanize](https://github.com/sparklemotion/mechanize).
The purpose is to cover all the features of original one.
Now, mechanize.cr can automatically store and send cookies, fill and submit forms.
[API Documentation](https://kanezoh.github.io/mechanize.cr/)
## Installation
@ -20,7 +21,7 @@ Now, mechanize.cr can automatically store and send cookies, fill and submit form
## Usage
### simple GET request
### GET request
```crystal
require "mechanize"
@ -95,6 +96,17 @@ For activation, simply setup the log to `:debug` level
Log.setup("mechanize", :debug)
```
### Basic auth
You can access a page which is protected by basic auth, setting username and password for the url.
```crystal
agent = Mechanize.new
agent.add_auth("http://example.com", "username", "password")
agent.get("http://example.com")
```
## Contributing
1. Fork it (<https://github.com/Kanezoh/mechanize.cr/fork>)

13
main.cr
View File

@ -1,13 +0,0 @@
require "./src/mechanize.cr"
agent = Mechanize.new
agent.request_headers = HTTP::Headers{"Foo" => "Bar"}
params = {"hoge" => "hoge"}
page = agent.get("http://example.com/", params: params)
# form = page.forms[0]
# query = {"foo" => "foo_value", "bar" => "bar_value"}
# page = agent.post("http://example.com/", query: query)
# puts page.code
# puts page.body
# puts page.css("h1").first.inner_text
# puts page.title

View File

@ -11,6 +11,8 @@ license: MIT
dependencies:
lexbor:
github: kostya/lexbor
http_proxy:
github: mamantoha/http_proxy
development_dependencies:
webmock:
@ -18,6 +20,6 @@ development_dependencies:
branch: master
kemal:
github: kemalcr/kemal
version: ~> 1.0.0
version: ~> 1.3.0
kemal-basic-auth:
github: kemalcr/kemal-basic-auth

View File

@ -1,5 +1,6 @@
require "./spec_helper"
WebMock.stub(:get, "http://example.com/?foo=bar&foo1=bar2")
WebMock.stub(:get, "http://example.com/path?foo=bar&foo1=bar2")
WebMock.stub(:post, "http://example.com/post")
.with(body: "email=foobar", headers: {"Content-Type" => "application/x-www-form-urlencoded"})
.to_return(body: "success")
@ -40,6 +41,14 @@ describe "Mechanize HTTP test" do
page.uri.to_s.should eq uri
end
it "GET with query parameter with path" do
agent = Mechanize.new
uri = "http://example.com/path?foo=bar&foo1=bar2"
page = agent.get(uri)
page.code.should eq 200
page.uri.to_s.should eq uri
end
it "simple POST" do
agent = Mechanize.new
query = {"email" => "foobar"}

17
spec/proxy_spec.cr Normal file
View File

@ -0,0 +1,17 @@
require "./spec_helper"
WebMock.stub(:get, "http://example.com/with_proxy").to_return(body: "success")
describe "Mechanize proxy test" do
it "set proxy" do
with_proxy_server do |host, port, wants_close|
agent = Mechanize.new
agent.set_proxy("127.0.0.1", 8080)
page = agent.get("http://example.com/with_proxy")
page.body.should eq("success")
page.code.should eq(200)
ensure
wants_close.send(nil)
end
end
end

View File

@ -1,5 +1,6 @@
require "spec"
require "webmock"
require "http_proxy"
require "../src/mechanize"
WebMock.stub(:get, "example.com")
@ -46,3 +47,22 @@ WebMock.stub(:post, "example.com/post_path")
WebMock.stub(:post, "example.com/post_path")
.with(body: "name=foo&email=bar&commit=submit", headers: {"Content-Type" => "application/x-www-form-urlencoded"})
.to_return(body: "success with button")
def with_proxy_server(host = "127.0.0.1", port = 8080, &)
wants_close = Channel(Nil).new
server = HTTP::Proxy::Server.new
spawn do
server.bind_tcp(host, port)
server.listen
end
spawn do
wants_close.receive
server.close
end
Fiber.yield
yield host, port, wants_close
end

View File

@ -2,6 +2,7 @@ require "log"
require "uri"
require "http/client"
require "lexbor"
require "http_proxy"
require "./mechanize/http/agent"
require "./mechanize/form"
require "./mechanize/node"
@ -298,13 +299,18 @@ class Mechanize
# set basic auth credentials.
# ```
# # make download.html whose content is http://example.com's html.
# # set an auth credential with a specific url.
# agent.add_auth("http://example.com", "username", "password")
# ```
def add_auth(uri : String, user : String, pass : String)
@agent.add_auth(uri, user, pass)
end
# Sets the proxy +address+ at +port+ with an optional +user+ and +password+
def set_proxy(address : String, port : Int32, user : String? = nil, password : String? = nil)
@agent.set_proxy(address, port, user, password)
end
# Runs given block, then resets the page history as it was before.
private def transact
# save the previous history status.

View File

@ -193,7 +193,6 @@ class Mechanize
# submitted with multiple buttons, pass each button to this method.
def add_button_to_query(button)
unless button.form_node == @node
message = ""
"#{button.inspect} does not belong to the same page as " \
"the form #{@name.inspect} in #{@page.try &.uri}"
message = "not a valid button"

View File

@ -23,10 +23,14 @@ class Mechanize::FormContent::SelectList < Mechanize::FormContent::MultiSelectLi
end
def value=(new_value)
values = new_value
@values = new_value
end
def query_value
value ? [[name, value.not_nil!]] : nil
if v = value
[[name, v]]
else
nil
end
end
end

View File

@ -30,7 +30,7 @@ class Mechanize
if size == 0
# TODO: raise error
end
page = @array.pop
@array.pop
end
end
end

View File

@ -16,6 +16,8 @@ class Mechanize
getter authenticate_methods : Hash(URI, Hash(String, Array(AuthRealm)))
getter authenticate_parser : WWWAuthenticateParser
@proxy : ::HTTP::Proxy::Client?
def initialize(@context : Mechanize? = nil)
@history = History.new
@request_headers = ::HTTP::Headers.new
@ -43,6 +45,7 @@ class Mechanize
uri, params = resolve_parameters(uri, method, params)
client = ::HTTP::Client.new(uri)
request_auth client, uri
client.set_proxy(@proxy) if @proxy
response = http_request(client, uri, method, params, body)
body = response.not_nil!.body
page = response_parse(response, body, uri)
@ -66,7 +69,7 @@ class Mechanize
redirect_url = response.headers["location"]
uri = resolve_url(redirect_url, referer)
Log.debug { "follow redirect to: #{uri.to_s}" }
Log.debug { "follow redirect to: #{uri}" }
# Make sure we are not copying over the POST headers from the original request
headers.delete("Content-MD5")
@ -84,6 +87,7 @@ class Mechanize
body : String?) : ::HTTP::Client::Response?
request_log(uri, method)
path = uri.path
path += "?#{uri.query.not_nil!}" if uri.query
case uri.scheme.not_nil!.downcase
when "http", "https"
@ -162,13 +166,17 @@ class Mechanize
# set basic auth credentials.
# ```
# # make download.html whose content is http://example.com's html.
# # set an auth credential with a specific url.
# agent.add_auth("http://example.com", "username", "password")
# ```
def add_auth(uri : String, user : String, pass : String)
@auth_store.add_auth(uri, user, pass)
end
def set_proxy(address : String, port : Int32, user : String? = nil, password : String? = nil)
@proxy = ::HTTP::Proxy::Client.new(address, port, username: user, password: password)
end
private def set_request_headers(uri, headers)
reset_request_header_cookies
headers.each do |k, v|
@ -317,7 +325,6 @@ class Mechanize
# extract valid cookies according to URI
private def valid_cookies(uri)
host = uri.host
valid_cookies = ::HTTP::Cookies.new
request_cookies.each do |cookie|
valid_cookies << cookie if cookie.valid_cookie?(uri)
@ -326,7 +333,7 @@ class Mechanize
end
private def request_log(uri, method)
Log.debug { "#{method.to_s.upcase}: #{uri.to_s}" }
Log.debug { "#{method.to_s.upcase}: #{uri}" }
request_headers.each do |key, values|
value = values.size == 1 ? values.first : values

View File

@ -8,6 +8,7 @@ class Mechanize
@auth_accounts = Hash(URI, Hash(String, Array(String))).new
end
# set an auth credential with a specific url.
def add_auth(uri : String | URI, user : String, pass : String, realm : String? = nil, domain : String? = nil)
target_uri = uri.dup
unless uri.is_a?(URI)
@ -28,7 +29,6 @@ class Mechanize
end
end
##
# Returns true if credentials exist for the +challenges+ from the server at
# +uri+.

View File

@ -127,7 +127,7 @@ class Mechanize
return nil unless value
return [name, value]
[name, value]
end
##

View File

@ -43,11 +43,11 @@ class Mechanize
# page.forms # => Array(Mechanize::Form)
# ```
def forms : Array(Form)
forms = css("form").map do |html_form|
css("form").map do |html_form|
form = Form.new(html_form, self)
form.action ||= @uri.to_s
form
end.to_a
end
end
# return all links(`Mechanize::PageContent::Link`) in the page.
@ -55,11 +55,11 @@ class Mechanize
# page.links # => Array(Mechanize::PageContent::Link)
# ```
def links : Array(PageContent::Link)
links = %w{a area}.map do |tag|
%w{a area}.flat_map do |tag|
css(tag).map do |node|
PageContent::Link.new(node, @mech, self)
end
end.flatten
end
end
elements_with "form"