Skip to content

Commit 612e532

Browse files
committed
update
1 parent bc975e9 commit 612e532

26 files changed

+145
-132
lines changed

CHANGELOG.md

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
1+
## 3.3.0
2+
* update liblexbor to latest
3+
* add usage `Lexbor.new` instead of `Lexbor::Parser.new`
4+
15
## 3.2.0
2-
* updat liblexbor to latest
6+
* update liblexbor to latest
37

48
## 3.1.3
59
* fix usage in interpreter

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ html = <<-HTML
3434
</html>
3535
HTML
3636

37-
lexbor = Lexbor::Parser.new(html)
37+
lexbor = Lexbor.new(html)
3838

3939
lexbor.nodes(:div).each do |node|
4040
id = node["id"]?
@@ -76,7 +76,7 @@ html = <<-HTML
7676
</html>
7777
HTML
7878
79-
lexbor = Lexbor::Parser.new(html)
79+
lexbor = Lexbor.new(html)
8080
8181
p lexbor.css("#t2 tr td:first-child").map(&.inner_text).to_a
8282
# => ["123", "foo", "bar", "xyz"]

examples/basic1.cr

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ html = <<-HTML
1313
</html>
1414
HTML
1515

16-
lexbor = Lexbor::Parser.new(html)
16+
lexbor = Lexbor.new(html)
1717

1818
lexbor.nodes(:div).each do |node|
1919
id = node["id"]?

examples/create_html.cr

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
require "../src/lexbor"
44

5-
doc = Lexbor::Parser.new ""
5+
doc = Lexbor.new ""
66
body = doc.body!
77

88
div = doc.create_node(:div)

examples/css_selectors1.cr

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ html = <<-HTML
1616
</html>
1717
HTML
1818

19-
lexbor = Lexbor::Parser.new(html)
19+
lexbor = Lexbor.new(html)
2020

2121
p lexbor.css("#t2 tr td:first-child").map(&.inner_text).to_a
2222
# => ["123", "foo", "bar", "xyz"]

examples/css_selectors2.cr

+10-10
Original file line numberDiff line numberDiff line change
@@ -15,30 +15,30 @@ html = <<-PAGE
1515
</div>
1616
PAGE
1717

18-
parser = Lexbor::Parser.new(html)
18+
lexbor = Lexbor.new(html)
1919

2020
# select all p nodes which id like `*p*`
21-
p parser.css("p[id*=p]").map(&.attribute_by("id")).to_a # => ["p1", "p2", "p3", "p4", "p5", "p6"]
21+
p lexbor.css("p[id*=p]").map(&.attribute_by("id")).to_a # => ["p1", "p2", "p3", "p4", "p5", "p6"]
2222

2323
# select all nodes with class "jo"
24-
p parser.css("p.jo").map(&.attribute_by("id")).to_a # => ["p2", "p4", "p6"]
25-
p parser.css(".jo").map(&.attribute_by("id")).to_a # => ["p2", "p4", "p6"]
24+
p lexbor.css("p.jo").map(&.attribute_by("id")).to_a # => ["p2", "p4", "p6"]
25+
p lexbor.css(".jo").map(&.attribute_by("id")).to_a # => ["p2", "p4", "p6"]
2626

2727
# select odd child tag inside div, which not contain a
28-
p parser.css("div > :nth-child(2n+1):not(:has(a))").map(&.attribute_by("id")).to_a # => ["p1", "p4", "p6"]
28+
p lexbor.css("div > :nth-child(2n+1):not(:has(a))").map(&.attribute_by("id")).to_a # => ["p1", "p4", "p6"]
2929

3030
# all elements with class=jo inside last div tag
31-
p parser.css("div").to_a.last.css(".jo").map(&.attribute_by("id")).to_a # => ["p4", "p6"]
31+
p lexbor.css("div").to_a.last.css(".jo").map(&.attribute_by("id")).to_a # => ["p4", "p6"]
3232

3333
# a element with href ends like .png
34-
p parser.css(%q{a[href$=".png"]}).map(&.attribute_by("id")).to_a # => ["a2"]
34+
p lexbor.css(%q{a[href$=".png"]}).map(&.attribute_by("id")).to_a # => ["a2"]
3535

3636
# find all a tags inside <p id=p3>, which href contain `html`
37-
p parser.css(%q{p[id=p3] > a[href*="html"]}).map(&.attribute_by("id")).to_a # => ["a1"]
37+
p lexbor.css(%q{p[id=p3] > a[href*="html"]}).map(&.attribute_by("id")).to_a # => ["a1"]
3838

3939
# find all a tags inside <p id=p3>, which href contain `html` or ends_with `.png`
40-
p parser.css(%q{p[id=p3] > a:is([href *= "html"], [href $= ".png"])}).map(&.attribute_by("id")).to_a # => ["a1", "a2"]
40+
p lexbor.css(%q{p[id=p3] > a:is([href *= "html"], [href $= ".png"])}).map(&.attribute_by("id")).to_a # => ["a1", "a2"]
4141

4242
# create filter and use it in many places, this is faster, than create it many times
4343
filter = Lexbor::CssFilter.new(".jo")
44-
p parser.css(filter).map(&.attribute_by("id")).to_a # => ["p2", "p4", "p6"]
44+
p lexbor.css(filter).map(&.attribute_by("id")).to_a # => ["p2", "p4", "p6"]

examples/links.cr

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def good_texts_iterator(iterator)
3535
.reject(&.empty?)
3636
end
3737

38-
Lexbor::Parser.new(str).nodes(:a).each do |node|
38+
Lexbor.new(str).nodes(:a).each do |node|
3939
anchor = node.inner_text(deep: true)
4040
href = node.attribute_by("href")
4141
before = good_texts_iterator(node.left_iterator).first?

examples/print_html.cr

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ formatting = (ARGV[1]? != "0")
2626
remove_whitespaces = (ARGV[2]? != "0")
2727
remove_comments = (ARGV[3]? != "0")
2828

29-
lexbor = Lexbor::Parser.new(str)
29+
lexbor = Lexbor.new(str)
3030

3131
if remove_comments
3232
nodes = lexbor.nodes(:_em_comment).to_a # important to materialize array with to_a, before removing

examples/print_tree.cr

+1-2
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@ str = if filename = ARGV[0]?
1313
"<html><Div><span class='test'>HTML</span></div></html>"
1414
end
1515

16-
parser = Lexbor::Parser.new(str)
17-
walk(parser.root!)
16+
walk(Lexbor.new(str).root!)
1817

1918
# Output:
2019
# Lexbor::Node(:html)

examples/texts.cr

+3-4
Original file line numberDiff line numberDiff line change
@@ -30,17 +30,16 @@ struct Lexbor::Node
3030
end
3131
end
3232

33-
def words(parser)
34-
parser
33+
def words(doc)
34+
doc
3535
.nodes(:_text) # iterate through all TEXT nodes
3636
.select(&.parents.all?(&.displayble?)) # select only which parents are visible good tag
3737
.map(&.tag_text) # mapping node text
3838
.reject(&.blank?) # reject blanked texts
3939
.map(&.strip.gsub(/\s{2,}/, " ")) # remove extra spaces
4040
end
4141

42-
parser = Lexbor::Parser.new(str)
43-
puts words(parser).join(" | ")
42+
puts words(Lexbor.new(str)).join(" | ")
4443

4544
# Output:
4645
# Название: | Что я сделал? | Ответил: | Чудище-Змей | на | 21 Октябрь 2005, 18:11 |

examples/usage.cr

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ require "../src/lexbor"
55
puts Lexbor.version
66

77
page = "<html><div class=aaa>bla</div></html>"
8-
lexbor = Lexbor::Parser.new(page)
8+
lexbor = Lexbor.new(page)
99

1010
# html node
1111
lexbor.root # (.html) Lexbor::Node?

spec/css_selectors_spec.cr

+14-14
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ describe Lexbor do
55
html = "<div><p id=p1><p id=p2><p id=p3><a>link</a><p id=p4><p id=p5><p id=p6></div>"
66
selector = "div > :nth-child(2n+1):not(:has(a))"
77

8-
parser = Lexbor::Parser.new(html)
8+
parser = Lexbor.new(html)
99
finder = Lexbor::CssFilter.new(selector)
1010
nodes = finder.search_from(parser.html!).to_a
1111

@@ -23,7 +23,7 @@ describe Lexbor do
2323
it "css for root! node" do
2424
html = "<div><p id=p1><p id=p2 class=jo><p id=p3><a>link</a><span id=bla><p id=p4 class=jo><p id=p5 class=bu><p id=p6 class=jo></span></div>"
2525

26-
parser = Lexbor::Parser.new(html)
26+
parser = Lexbor.new(html)
2727
nodes = parser.root!.css("div > :nth-child(2n+1):not(:has(a))").to_a
2828

2929
nodes.size.should eq 2
@@ -40,21 +40,21 @@ describe Lexbor do
4040
it "another rule" do
4141
html = "<div><p id=p1><p id=p2 class=jo><p id=p3><a>link</a><span id=bla><p id=p4 class=jo><p id=p5 class=bu><p id=p6 class=jo></span></div>"
4242

43-
parser = Lexbor::Parser.new(html)
43+
parser = Lexbor.new(html)
4444
parser.root!.css(".jo").to_a.map(&.attribute_by("id")).should eq %w(p2 p4 p6)
4545
end
4646

4747
it "another rule for parser itself" do
4848
html = "<div><p id=p1><p id=p2 class=jo><p id=p3><a>link</a><span id=bla><p id=p4 class=jo><p id=p5 class=bu><p id=p6 class=jo></span></div>"
4949

50-
parser = Lexbor::Parser.new(html)
50+
parser = Lexbor.new(html)
5151
parser.css(".jo").to_a.map(&.attribute_by("id")).should eq %w(p2 p4 p6)
5252
end
5353

5454
it "work for another scope node" do
5555
html = "<div><p id=p1><p id=p2 class=jo><p id=p3><a>link</a><div id=bla><p id=p4 class=jo><p id=p5 class=bu><p id=p6 class=jo></div></div>"
5656

57-
parser = Lexbor::Parser.new(html)
57+
parser = Lexbor.new(html)
5858
parser.nodes(:div).to_a.last.css(".jo").to_a.map(&.attribute_by("id")).should eq %w(p4 p6)
5959
parser.nodes(:div).to_a.first.css(".jo").to_a.map(&.attribute_by("id")).should eq %w(p2 p4 p6)
6060
end
@@ -63,7 +63,7 @@ describe Lexbor do
6363
it "for parser" do
6464
html = "<div><p id=p1><p id=p2 class=jo><p id=p3><a>link</a><span id=bla><p id=p4 class=jo><p id=p5 class=bu><p id=p6 class=jo></span></div>"
6565

66-
parser = Lexbor::Parser.new(html)
66+
parser = Lexbor.new(html)
6767
finder = Lexbor::CssFilter.new(".jo")
6868

6969
10.times do
@@ -76,7 +76,7 @@ describe Lexbor do
7676
it "for parser" do
7777
html = "<div><p id=p1><p id=p2 class=jo><p id=p3><a>link</a><span id=bla><p id=p4 class=jo><p id=p5 class=bu><p id=p6 class=jo></span></div>"
7878

79-
parser = Lexbor::Parser.new(html)
79+
parser = Lexbor.new(html)
8080
finder = Lexbor::CssFilter.new(".jo")
8181

8282
10.times do
@@ -87,7 +87,7 @@ describe Lexbor do
8787
it "for root node" do
8888
html = "<div><p id=p1><p id=p2 class=jo><p id=p3><a>link</a><span id=bla><p id=p4 class=jo><p id=p5 class=bu><p id=p6 class=jo></span></div>"
8989

90-
parser = Lexbor::Parser.new(html)
90+
parser = Lexbor.new(html)
9191
finder = Lexbor::CssFilter.new(".jo")
9292

9393
10.times do
@@ -99,7 +99,7 @@ describe Lexbor do
9999
it "should raise on empty selector" do
100100
html = "<div><p id=p1><p id=p2 class=jo><p id=p3><a>link</a><span id=bla><p id=p4 class=jo><p id=p5 class=bu><p id=p6 class=jo></span></div>"
101101

102-
parser = Lexbor::Parser.new(html)
102+
parser = Lexbor.new(html)
103103
expect_raises(Lexbor::LibError, "Failed to css_selectors_parse for") do
104104
finder = Lexbor::CssFilter.new("")
105105
parser.css(finder).to_a.size.should eq 0
@@ -122,7 +122,7 @@ describe Lexbor do
122122
</div>
123123
PAGE
124124

125-
parser = Lexbor::Parser.new(html)
125+
parser = Lexbor.new(html)
126126

127127
# select all p nodes which id like `*p*`
128128
parser.css("p[id*=p]").map(&.attribute_by("id")).to_a.should eq ["p1", "p2", "p3", "p4", "p5", "p6"]
@@ -166,7 +166,7 @@ describe Lexbor do
166166
</body></html>
167167
PAGE
168168

169-
parser = Lexbor::Parser.new(html)
169+
parser = Lexbor.new(html)
170170
parser.css("#t2 tr td:first-child").map(&.inner_text).to_a.should eq ["123", "foo", "bar", "xyz"]
171171
parser.css("#t2 tr td:first-child").map(&.to_html).to_a.should eq ["<td>123</td>", "<td>foo</td>", "<td>bar</td>", "<td>xyz</td>"]
172172

@@ -179,7 +179,7 @@ describe Lexbor do
179179

180180
it "not sigfaulting on more than 1024 elements" do
181181
str = "<html>" + "<div class=A>ooo</div>" * 20000 + "</html>"
182-
parser = Lexbor::Parser.new(str)
182+
parser = Lexbor.new(str)
183183

184184
c = 0
185185
x = 0
@@ -192,12 +192,12 @@ describe Lexbor do
192192
end
193193

194194
it "bug in css" do
195-
parser = Lexbor::Parser.new(%q{<div class="asfjjjj">bla</div>})
195+
parser = Lexbor.new(%q{<div class="asfjjjj">bla</div>})
196196
parser.css("div.jjjj").to_a.size.should eq 0
197197
end
198198

199199
it "css with yield" do
200-
parser = Lexbor::Parser.new(%q{<div class="jjjj">bla</div>})
200+
parser = Lexbor.new(%q{<div class="jjjj">bla</div>})
201201
parser.css("div.jjjj") { |col| col.to_a.size }.should eq 1
202202
end
203203
end

spec/integration_spec.cr

+1-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def parser_links
6464
</html>
6565
HTML
6666

67-
Lexbor::Parser.new(str)
67+
Lexbor.new(str)
6868
end
6969

7070
describe "integration" do

spec/iterators_spec.cr

+2-2
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def parser(**args)
2121
</html>
2222
HTML
2323

24-
parser = Lexbor::Parser.new(str, **args)
24+
parser = Lexbor.new(str, **args)
2525
parser
2626
end
2727

@@ -168,7 +168,7 @@ describe "iterators" do
168168
end
169169

170170
# it "Collection befave like array, when multiple times call size, empty? and others..." do
171-
# parser = Lexbor::Parser.new(%q{<head><title>Title</title></head>})
171+
# parser = Lexbor.new(%q{<head><title>Title</title></head>})
172172
# iter = parser.css("title")
173173

174174
# iter.size.should eq 1

spec/lexbor_spec.cr

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ require "./spec_helper"
22

33
describe Lexbor do
44
it "parser work" do
5-
parser = Lexbor::Parser.new("<html>BLA</html>")
5+
parser = Lexbor.new("<html>BLA</html>")
66

77
parser.root!.tag_name.should eq "html"
88
parser.root!.child!.tag_name.should eq "head"

0 commit comments

Comments
 (0)