forked from stormy/Quora-Personal-Analytics
-
Notifications
You must be signed in to change notification settings - Fork 1
/
QuoraCrawler.rb
191 lines (150 loc) · 4.4 KB
/
QuoraCrawler.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
require 'rubygems'
require 'bundler/setup'
require 'watir'
require 'optparse'
directory = File.dirname(__FILE__)
if not Dir.pwd == directory
Dir.chdir directory
end
options = {}
OptionParser.new do |opts|
opts.banner = "Usage: example.rb [options] 'first-last'. No options to crawl all."
opts.on("-a", "--answers", "Crawl Answers only") do |v|
options[:answers] = v
end
opts.on("-f", "--followers", "Crawl followers only") do |v|
options[:followers] = v
end
opts.on("-n", "--following", "Crawl following only") do |v|
options[:following] = v
end
opts.on("-q", "--questions", "Crawl questions only") do |v|
options[:questions] = v
end
opts.on("-p", "--posts", "Crawl posts only") do |v|
options[:posts] = v
end
opts.on("-t", "--topics", "Crawl topics only") do |v|
options[:topics] = v
end
opts.on("-m", "--mentions", "Crawl mentions only") do |v|
options[:mentions] = v
end
opts.on("-r", "--profile", "Crawl profile only") do |v|
options[:profile] = v
end
opts.on("-y", "--followingquestions", "Crawl the ONLY your logged in account is following") do |v|
options[:following_questions] = v
end
end.parse!
Watir::Browser.default = 'firefox'
b = Watir::Browser.new
name = ARGV[0]
if not File.exist?(name)
Dir.mkdir(name)
end
def click_more(b)
if b.cell(:xpath, "//div[@class='pager_next action_button']").exists?
while !b.cell(:xpath, "//div[@class='pager_next action_button' and @style='display: none;']").exists?
b.cell(:xpath, "//div[@class='pager_next action_button']").click
end
end
end
def expand_more(b)
if b.cell(:xpath, "//span[@class='answer_voters']//a[@class='more_link']").exists?
while b.cell(:xpath, "//span[@class='answer_voters']//a[@class='more_link']").exists?
b.cell(:xpath, "//span[@class='answer_voters']//a[@class='more_link']").click
end
end
end
def get_following(b, name)
b.goto "http://www.quora.com/#{name}/following"
click_more(b)
File.open("#{name}/following.html", 'w') {|f| f.write(b.html)}
end
def get_followers(b, name)
b.goto "http://www.quora.com/#{name}/followers"
click_more(b)
File.open("#{name}/followers.html", 'w') {|f| f.write(b.html)}
end
def get_answers(b, name)
b.goto "http://www.quora.com/#{name}/answers"
click_more(b)
expand_more(b)
sleep(3)
File.open("#{name}/answers.html", 'w') {|f| f.write(b.html)}
end
def get_questions(b, name)
b.goto "http://www.quora.com/#{name}/questions"
click_more(b)
while b.cell(:xpath, "//a[@class='view_all_topics']").exists?
b.cell(:xpath, "//a[@class='view_all_topics']").click
end
File.open("#{name}/questions.html", 'w') {|f| f.write(b.html)}
end
def get_posts(b, name)
b.goto "http://www.quora.com/#{name}/posts"
click_more(b)
# expand_more(b)
File.open("#{name}/posts.html", 'w') {|f| f.write(b.html)}
end
def get_mentions(b, name)
b.goto "http://www.quora.com/#{name}/mentions"
click_more(b)
File.open("#{name}/mentions.html", 'w') {|f| f.write(b.html)}
end
def get_topics(b, name)
b.goto "http://www.quora.com/#{name}/topics"
click_more(b)
# while b.cell(:xpath, "//a[@class='view_all_topics']").exists? and !b.cell(:xpath, "//a[@class='view_all_topics hidden']").exists?
# b.cell(:xpath, "//a[@class='view_all_topics']")
# end
File.open("#{name}/topics.html", 'w') {|f| f.write(b.html)}
end
def get_profile(b, name)
b.goto "http://www.quora.com/#{name}/"
File.open("#{name}/about.html", 'w') {|f| f.write(b.html)}
end
def get_following_questions(b, name)
b.goto "http://www.quora.com/home/following"
click_more(b)
File.open("#{name}/following_questions.html", 'w') {|f| f.write(b.html)}
end
if options.empty?
get_followers(b, name)
get_following(b, name)
get_answers(b, name)
get_questions(b, name)
get_posts(b, name)
get_mentions(b, name)
get_profile(b, name)
get_topics(b, name)
else
if options.include? :followers
get_followers(b, name)
end
if options.include? :following
get_following(b, name)
end
if options.include? :answers
get_answers(b, name)
end
if options.include? :questions
get_questions(b, name)
end
if options.include? :posts
get_posts(b, name)
end
if options.include? :topics
get_topics(b, name)
end
if options.include? :mentions
get_mentions(b, name)
end
if options.include? :profile
get_profile(b, name)
end
if options.include? :following_questions
get_following_questions(b, name)
end
end