]> git.netwichtig.de Git - user/henk/code/ruby/rbot.git/commitdiff
search: fix google calc scraping
authorGiuseppe Bilotta <giuseppe.bilotta@gmail.com>
Mon, 10 Sep 2012 04:33:32 +0000 (06:33 +0200)
committerGiuseppe Bilotta <giuseppe.bilotta@gmail.com>
Mon, 10 Sep 2012 04:33:32 +0000 (06:33 +0200)
data/rbot/plugins/search.rb

index 0a4397084db38b48a2be56d2057ced3a7d492f62..0e80a2d8486a639ee36400a076b1f096b431999b 100644 (file)
@@ -18,7 +18,7 @@
 GOOGLE_SEARCH = "http://www.google.com/search?oe=UTF-8&q="
 GOOGLE_WAP_SEARCH = "http://www.google.com/m/search?hl=en&q="
 GOOGLE_WAP_LINK = /"r">(?:<div[^>]*>)?<a href="([^"]+)"[^>]*>(.*?)<\/a>/im
-GOOGLE_CALC_RESULT = %r{<img src=/images/calc_img\.gif(?: width=40 height=30 alt="")?>.*?<h[1-6] class=r[^>]*><b>(.+?)</b>}
+GOOGLE_CALC_RESULT = %r{<h[1-6] class="r" [^>]*>(.+?)</h}
 GOOGLE_COUNT_RESULT = %r{<font size=-1>Results <b>1<\/b> - <b>10<\/b> of about <b>(.*)<\/b> for}
 GOOGLE_DEF_RESULT = %r{onebox_result">\s*(.*?)\s*<br/>\s*(.*?)<table}
 GOOGLE_TIME_RESULT = %r{alt="Clock"></td><td valign=[^>]+>(.+?)<(br|/td)>}
@@ -202,7 +202,7 @@ class SearchPlugin < Plugin
     debug "#{html.size} bytes of html recieved"
     debug html
 
-    candidates = html.match(/font-weight:bold">(.*?)<\/(?:span|div)>/)
+    candidates = html.match(GOOGLE_CALC_RESULT)
     debug "candidates: #{candidates.inspect}"
 
     if candidates.nil?