From 4e90cc6417c0922acf431a5412d58300733ae695 Mon Sep 17 00:00:00 2001 From: Rasel Date: Fri, 10 Dec 2021 19:39:44 +0600 Subject: [PATCH 1/6] Scraping tickets details and all flights information for between two dates, related code updated --- tour_site_scraper.rb | 126 ++++++++++++++++++++++++++++++------------- 1 file changed, 90 insertions(+), 36 deletions(-) diff --git a/tour_site_scraper.rb b/tour_site_scraper.rb index 78a044b..fc12718 100644 --- a/tour_site_scraper.rb +++ b/tour_site_scraper.rb @@ -2,52 +2,106 @@ require 'selenium-webdriver' require 'pry' -# This options are for headless execution of the browser so that it don't need to load browser +MAX_RETRY = 100 +WAIT = Selenium::WebDriver::Wait.new(timeout: 20) + # options = Selenium::WebDriver::Firefox::Options.new(args: ['-headless']) # driver = Selenium::WebDriver.for(:firefox, options: options) - driver = Selenium::WebDriver.for :firefox + puts 'Trying to fetch data from site.....' puts '--------------------------------------------------------' -# Generate the search url physically using any date, time and put here, we will make it dynamic later based on requirement -driver.navigate.to 'https://www.tour.ne.jp/j_air/list/?adult=1&air_type=2&arr_out=ITM&change_date_in=0&change_date_out=0&date_out=20211231&dpt_out=TYO&time_from_out=0600&time_to_out=0700&time_type_out=0' -sleep(1) # Wait 1s to load the page properly - -MAX_RETRY = 100 -begin - retries ||= 0 - ticket_summary_button = driver.find_element(:css, '#Act_Airline_Out') - return if ticket_summary_button.nil? - ticket_summary_button.click -rescue Exception => e - puts 'Trying to fetch data.. ' + retries.to_s - retries += 1 - sleep(1) # Wait 1s to load the page properly - retry if (retries <= MAX_RETRY) - raise "Could not get ticket website information: Please give necessary information to search" +def check_return_tickets_visibility(driver) + begin + # Wait for few seconds until able to find return tickets list + WAIT.until { driver.find_element(css: "#Act_response_in .company-list").displayed? } + rescue Exception + end end -# Take some time after click to load ajax content until search element can be found -loop do - sleep(1) - if !driver.find_elements(:class, 'airline-name').nil? - break +ticket_search_date_from = Date.new(2021, 12, 31) +ticket_search_date_to = Date.new(2022, 01, 31) +ticket_search_date_from.upto(ticket_search_date_to) do |dt| + departure_date_in = dt.to_s.delete("-") + departure_date_out = dt.to_s.delete("-") + + puts "\n\nTickets for this date " + dt.to_s + + # Generate the search url physically using any date, time and put here, we will make it dynamic later based on requirement + driver.navigate.to 'https://www.tour.ne.jp/j_air/list/?adult=1&arr_in=TYO&arr_out=CTS&change_date_in=0&change_date_out=0&date_in=' + departure_date_in + '&date_out=' + departure_date_out + '&dpt_in=CTS&dpt_out=TYO&time_from_out=0600&time_to_out=0700&time_type_out=0' + sleep(1) # Wait 1s to load the page properly + begin + retries ||= 0 + ticket_summary_button_out = nil + ticket_summary_button_out = driver.find_element(:css, '#Act_Airline_Out') + ticket_summary_button_in = driver.find_element(:css, '#Act_Airline_In') + return if ticket_summary_button_out.nil? && ticket_summary_button_in.nil? + ticket_summary_button_out.click + ticket_summary_button_in.click + + WAIT.until { driver.find_element(css: "#Act_response_out .company-list").displayed? } + rescue Exception => e + puts 'Trying to fetch data.. ' + retries.to_s + retries += 1 + sleep(1) # Wait 1s to load the page properly + retry if (retries <= MAX_RETRY) + raise "Could not get ticket website information: Please give necessary information to search" end -end -# Find available information and available ticket list elements -ticket_summary = driver.find_elements(:class, 'airline-name') -ticket_available_lists = driver.find_elements(:class, 'toggle-btn-company') + check_return_tickets_visibility(driver) -# Parse elements to find each companies available ticket and sum to get total available tickets -total_available_ticket = 0 -!ticket_available_lists.nil? && ticket_available_lists.each do |ticket_count| - total_available_ticket += ticket_count.text.delete('^0-9').to_i -end + ticket_lists = driver.find_elements(:css, '#Act_response_in .company-list .company-box') + + ticket_lists.each do |ticket_list| + ticket_flight_lists = ticket_list.find_elements(:css, '.Act_flight_list') + # test = ticket_flight_lists.find_elements(:xpath, '//*[@id="Act_response_in"]/div/ul[contains(@class,"ticket-code")]') -puts 'Available ticket companies name = ' -!ticket_summary.nil? && ticket_summary.each do |ticket_cmpany| - puts ticket_cmpany.text.to_s + ', ' + ticket_flight_lists.each do |flight| + ticket_code = flight.find_elements(:css, '.ticket-summary-row > span')[1].attribute("innerHTML") + puts 'ticket codes are = ' + ticket_code + # flight.attribute("innerHTML") + # binding.pry + end + end + + +# binding.pry + + # Scrap Available Tickets Elements + ticket_summary = driver.find_elements(:css, '#Act_response_out .airline-name') + ticket_available_lists = driver.find_elements(:css, '#Act_response_out .toggle-btn-company') + + # Parse elements to find each companies available ticket and sum + total_available_ticket = 0 + !ticket_available_lists.nil? && ticket_available_lists.each do |ticket_count| + total_available_ticket += ticket_count.text.delete('^0-9').to_i + end + + # Scrap Returning Tickets Elements + ticket_summary_in = driver.find_elements(:css, '#Act_response_in .airline-name') + ticket_available_lists_in = driver.find_elements(:css, '#Act_response_in .toggle-btn-company') + + # Parse elements to find each companies returning tickets and sum + total_available_ticket_in = 0 + !ticket_available_lists_in.nil? && ticket_available_lists_in.each do |ticket_count_in| + total_available_ticket_in += ticket_count_in.text.delete('^0-9').to_i + end + + # Write all tickets search results + puts 'Total available ticket OUT found is = ' + total_available_ticket.to_s + puts 'Total available ticket IN found is = ' + total_available_ticket_in.to_s + + puts 'Available ticket IN companies name : ' + puts '------------------------------------' + !ticket_summary_in.nil? && ticket_summary_in.each do |ticket_cmpany_in| + puts ticket_cmpany_in.text.to_s + ', ' + end + + puts + puts 'Available ticket OUT companies name : ' + puts '-------------------------------------' + !ticket_summary.nil? && ticket_summary.each do |ticket_cmpany| + puts ticket_cmpany.text.to_s + ', ' + end end -puts 'Total available ticket found is = ' + total_available_ticket.to_s From 9168f831f6c0a04efde74ec4023ecc625ab622df Mon Sep 17 00:00:00 2001 From: Rasel Date: Mon, 13 Dec 2021 18:26:37 +0600 Subject: [PATCH 2/6] Ticket details scraping implemented for gettings all tickets information --- tour_site_scraper.rb | 99 ++++++++++++++++++++++---------------------- 1 file changed, 49 insertions(+), 50 deletions(-) diff --git a/tour_site_scraper.rb b/tour_site_scraper.rb index fc12718..9fe73e8 100644 --- a/tour_site_scraper.rb +++ b/tour_site_scraper.rb @@ -20,6 +20,47 @@ def check_return_tickets_visibility(driver) end end +def scrap_ticket_details(driver, ticket_details_type) + if ticket_details_type == 'in' + ticket_lists = driver.find_elements(:css, '#Act_response_in .company-list .company-box') + else + ticket_lists = driver.find_elements(:css, '#Act_response_out .company-list .company-box') + end + + total_ticket_found = 0 + all_tickets_details_lists = [] + ticket_lists&.each do |ticket_list| + temp_ticket_company_info = {} + number_of_ticket_found = 0 + + ticket_company_name = ticket_list.find_element(:css, '.airline-name').text + number_of_ticket_found = ticket_list.find_element(:css, '.toggle-btn-company').text.delete('^0-9').to_i + total_ticket_found += number_of_ticket_found + ticket_minimum_price = ticket_list.find_element(:css, '.hdg-sup-price > b').text + + temp_ticket_company_info[:ticket_company_name] = ticket_company_name + temp_ticket_company_info[:ticket_minimum_price] = ticket_minimum_price + temp_ticket_company_info[:number_of_ticket_found] = number_of_ticket_found + + flight_lists = [] + ticket_company_lists = ticket_list.find_elements(:css, '.Act_flight_list') + ticket_company_lists&.each do |flight| + ticket_code = flight.find_elements(:css, '.ticket-summary-row > span')[1].attribute("innerHTML") + ticket_price = flight.find_elements(:css, '.ticket-detail-item .ticket-detail-item-inner .ticket-price > label > b')[0].attribute("innerHTML") + ticket_seat = flight.find_elements(:css, '.ticket-detail-item .ticket-detail-item-inner .ticket-detail-type .ticket-detail-icon .icon-seat')[0].attribute("innerHTML") + ticket_changable_status = flight.find_elements(:css, '.ticket-detail-item .ticket-detail-item-inner .ticket-detail-type .ticket-detail-icon .icon-date')[0].attribute("innerHTML") + ticket_type = flight.find_elements(:css, '.ticket-detail-item .ticket-detail-item-inner .ticket-detail-type .ticket-detail-type-text .ticket-detail-type-text-ellipsis')[0].attribute("innerHTML") + flight_data = {} + flight_data['flight_code'] = ticket_code + flight_data['flight_price'] = ticket_price + flight_lists.push(flight_data) + end + temp_ticket_company_info[:flight_lists] = flight_lists + all_tickets_details_lists.push(temp_ticket_company_info) + end + return all_tickets_details_lists, total_ticket_found +end + ticket_search_date_from = Date.new(2021, 12, 31) ticket_search_date_to = Date.new(2022, 01, 31) ticket_search_date_from.upto(ticket_search_date_to) do |dt| @@ -51,57 +92,15 @@ def check_return_tickets_visibility(driver) check_return_tickets_visibility(driver) - ticket_lists = driver.find_elements(:css, '#Act_response_in .company-list .company-box') - - ticket_lists.each do |ticket_list| - ticket_flight_lists = ticket_list.find_elements(:css, '.Act_flight_list') - # test = ticket_flight_lists.find_elements(:xpath, '//*[@id="Act_response_in"]/div/ul[contains(@class,"ticket-code")]') - - ticket_flight_lists.each do |flight| - ticket_code = flight.find_elements(:css, '.ticket-summary-row > span')[1].attribute("innerHTML") - puts 'ticket codes are = ' + ticket_code - # flight.attribute("innerHTML") - # binding.pry - end - end - + tickets_out_list = scrap_ticket_details(driver, 'out') + all_ticket_out_lists = tickets_out_list[0] + total_ticket_out_found = tickets_out_list[1] -# binding.pry + tickets_in_list = scrap_ticket_details(driver, 'in') + all_ticket_in_details = tickets_in_list[0] + total_ticket_in_found = tickets_in_list[1] - # Scrap Available Tickets Elements - ticket_summary = driver.find_elements(:css, '#Act_response_out .airline-name') - ticket_available_lists = driver.find_elements(:css, '#Act_response_out .toggle-btn-company') + puts "Total tickets found for out is = " + total_ticket_out_found.to_s + puts "Total tickets found for in is = " + total_ticket_in_found.to_s - # Parse elements to find each companies available ticket and sum - total_available_ticket = 0 - !ticket_available_lists.nil? && ticket_available_lists.each do |ticket_count| - total_available_ticket += ticket_count.text.delete('^0-9').to_i - end - - # Scrap Returning Tickets Elements - ticket_summary_in = driver.find_elements(:css, '#Act_response_in .airline-name') - ticket_available_lists_in = driver.find_elements(:css, '#Act_response_in .toggle-btn-company') - - # Parse elements to find each companies returning tickets and sum - total_available_ticket_in = 0 - !ticket_available_lists_in.nil? && ticket_available_lists_in.each do |ticket_count_in| - total_available_ticket_in += ticket_count_in.text.delete('^0-9').to_i - end - - # Write all tickets search results - puts 'Total available ticket OUT found is = ' + total_available_ticket.to_s - puts 'Total available ticket IN found is = ' + total_available_ticket_in.to_s - - puts 'Available ticket IN companies name : ' - puts '------------------------------------' - !ticket_summary_in.nil? && ticket_summary_in.each do |ticket_cmpany_in| - puts ticket_cmpany_in.text.to_s + ', ' - end - - puts - puts 'Available ticket OUT companies name : ' - puts '-------------------------------------' - !ticket_summary.nil? && ticket_summary.each do |ticket_cmpany| - puts ticket_cmpany.text.to_s + ', ' - end end From 67bb30eddcb1a9e38fb7fb0d4b8fb2ca46afd0a9 Mon Sep 17 00:00:00 2001 From: Rasel Date: Mon, 13 Dec 2021 23:19:09 +0600 Subject: [PATCH 3/6] Refactored code --- tour_site_scraper.rb | 66 ++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/tour_site_scraper.rb b/tour_site_scraper.rb index 9fe73e8..a568463 100644 --- a/tour_site_scraper.rb +++ b/tour_site_scraper.rb @@ -4,27 +4,49 @@ MAX_RETRY = 100 WAIT = Selenium::WebDriver::Wait.new(timeout: 20) - +WEB_DRIVER = Selenium::WebDriver.for :firefox # options = Selenium::WebDriver::Firefox::Options.new(args: ['-headless']) # driver = Selenium::WebDriver.for(:firefox, options: options) -driver = Selenium::WebDriver.for :firefox + puts 'Trying to fetch data from site.....' puts '--------------------------------------------------------' -def check_return_tickets_visibility(driver) +def start_scraping(departure_date_in, departure_date_out) + # Generate the search url physically using any date, time and put here, we will make it dynamic later based on requirement + WEB_DRIVER.navigate.to 'https://www.tour.ne.jp/j_air/list/?adult=1&arr_in=TYO&arr_out=CTS&change_date_in=0&change_date_out=0&date_in=' + departure_date_in + '&date_out=' + departure_date_out + '&dpt_in=CTS&dpt_out=TYO&time_from_out=0600&time_to_out=0700&time_type_out=0' + sleep(1) # Wait 1s to load the page properly + begin + retries ||= 0 + ticket_summary_button_out = nil + ticket_summary_button_out = WEB_DRIVER.find_element(:css, '#Act_Airline_Out') + ticket_summary_button_in = WEB_DRIVER.find_element(:css, '#Act_Airline_In') + return if ticket_summary_button_out.nil? && ticket_summary_button_in.nil? + ticket_summary_button_out.click + ticket_summary_button_in.click + rescue Exception => e + puts 'Trying to fetch data.. ' + retries.to_s + retries += 1 + sleep(1) # Wait 1s to load the page properly + retry if (retries <= MAX_RETRY) + raise "Could not get ticket website information: Please give necessary information to search" + end +end + +def check_return_tickets_visibility begin # Wait for few seconds until able to find return tickets list - WAIT.until { driver.find_element(css: "#Act_response_in .company-list").displayed? } + WAIT.until { WEB_DRIVER.find_element(css: "#Act_response_out .company-list").displayed? } + WAIT.until { WEB_DRIVER.find_element(css: "#Act_response_in .company-list").displayed? } rescue Exception end end -def scrap_ticket_details(driver, ticket_details_type) +def scrap_ticket_details(ticket_details_type) if ticket_details_type == 'in' - ticket_lists = driver.find_elements(:css, '#Act_response_in .company-list .company-box') + ticket_lists = WEB_DRIVER.find_elements(:css, '#Act_response_in .company-list .company-box') else - ticket_lists = driver.find_elements(:css, '#Act_response_out .company-list .company-box') + ticket_lists = WEB_DRIVER.find_elements(:css, '#Act_response_out .company-list .company-box') end total_ticket_found = 0 @@ -68,39 +90,17 @@ def scrap_ticket_details(driver, ticket_details_type) departure_date_out = dt.to_s.delete("-") puts "\n\nTickets for this date " + dt.to_s + start_scraping(departure_date_in, departure_date_out) + check_return_tickets_visibility - # Generate the search url physically using any date, time and put here, we will make it dynamic later based on requirement - driver.navigate.to 'https://www.tour.ne.jp/j_air/list/?adult=1&arr_in=TYO&arr_out=CTS&change_date_in=0&change_date_out=0&date_in=' + departure_date_in + '&date_out=' + departure_date_out + '&dpt_in=CTS&dpt_out=TYO&time_from_out=0600&time_to_out=0700&time_type_out=0' - sleep(1) # Wait 1s to load the page properly - begin - retries ||= 0 - ticket_summary_button_out = nil - ticket_summary_button_out = driver.find_element(:css, '#Act_Airline_Out') - ticket_summary_button_in = driver.find_element(:css, '#Act_Airline_In') - return if ticket_summary_button_out.nil? && ticket_summary_button_in.nil? - ticket_summary_button_out.click - ticket_summary_button_in.click - - WAIT.until { driver.find_element(css: "#Act_response_out .company-list").displayed? } - rescue Exception => e - puts 'Trying to fetch data.. ' + retries.to_s - retries += 1 - sleep(1) # Wait 1s to load the page properly - retry if (retries <= MAX_RETRY) - raise "Could not get ticket website information: Please give necessary information to search" - end - - check_return_tickets_visibility(driver) - - tickets_out_list = scrap_ticket_details(driver, 'out') + tickets_out_list = scrap_ticket_details('out') all_ticket_out_lists = tickets_out_list[0] total_ticket_out_found = tickets_out_list[1] - tickets_in_list = scrap_ticket_details(driver, 'in') + tickets_in_list = scrap_ticket_details('in') all_ticket_in_details = tickets_in_list[0] total_ticket_in_found = tickets_in_list[1] puts "Total tickets found for out is = " + total_ticket_out_found.to_s puts "Total tickets found for in is = " + total_ticket_in_found.to_s - end From efeea1dc58f66f5ec227c34fdece6c3a32315872 Mon Sep 17 00:00:00 2001 From: Rasel Date: Tue, 14 Dec 2021 14:02:31 +0600 Subject: [PATCH 4/6] scraper implementation code refactored for avoiding ajax error issue --- tour_site_scraper.rb | 96 ++++++++++++++++++++++---------------------- 1 file changed, 49 insertions(+), 47 deletions(-) diff --git a/tour_site_scraper.rb b/tour_site_scraper.rb index a568463..01a8e5f 100644 --- a/tour_site_scraper.rb +++ b/tour_site_scraper.rb @@ -2,13 +2,18 @@ require 'selenium-webdriver' require 'pry' -MAX_RETRY = 100 -WAIT = Selenium::WebDriver::Wait.new(timeout: 20) +MAX_RETRY = 40 # Maximum retry until the serarch page load in seconds +MAX_CALL = 3 # Maximum recall air ticket site if any ajax error or busy page shown +# Put Ticket Search input dates here +TICKET_SEARCH_FROM_DATE = Date.new(2021, 12, 31) +TICKET_SEARCH_TO_DATE = Date.new(2022, 01, 31) + +WAIT = Selenium::WebDriver::Wait.new(timeout: 20) # Maximum wait to find out search results html WEB_DRIVER = Selenium::WebDriver.for :firefox + # options = Selenium::WebDriver::Firefox::Options.new(args: ['-headless']) # driver = Selenium::WebDriver.for(:firefox, options: options) - puts 'Trying to fetch data from site.....' puts '--------------------------------------------------------' @@ -33,73 +38,70 @@ def start_scraping(departure_date_in, departure_date_out) end end -def check_return_tickets_visibility - begin - # Wait for few seconds until able to find return tickets list - WAIT.until { WEB_DRIVER.find_element(css: "#Act_response_out .company-list").displayed? } - WAIT.until { WEB_DRIVER.find_element(css: "#Act_response_in .company-list").displayed? } - rescue Exception - end -end - -def scrap_ticket_details(ticket_details_type) +def searching_ticket_type(ticket_details_type) if ticket_details_type == 'in' - ticket_lists = WEB_DRIVER.find_elements(:css, '#Act_response_in .company-list .company-box') + ticket_airlines = WEB_DRIVER.find_elements(:css, '#Act_response_in .company-list .company-box') else - ticket_lists = WEB_DRIVER.find_elements(:css, '#Act_response_out .company-list .company-box') + ticket_airlines = WEB_DRIVER.find_elements(:css, '#Act_response_out .company-list .company-box') end total_ticket_found = 0 all_tickets_details_lists = [] - ticket_lists&.each do |ticket_list| - temp_ticket_company_info = {} + ticket_airlines&.each do |ticket_airline| + temp_ticket_airline_info = {} number_of_ticket_found = 0 - ticket_company_name = ticket_list.find_element(:css, '.airline-name').text - number_of_ticket_found = ticket_list.find_element(:css, '.toggle-btn-company').text.delete('^0-9').to_i + ticket_company_name = ticket_airline.find_element(:css, '.airline-name').text + number_of_ticket_found = ticket_airline.find_element(:css, '.toggle-btn-company').text.delete('^0-9').to_i total_ticket_found += number_of_ticket_found - ticket_minimum_price = ticket_list.find_element(:css, '.hdg-sup-price > b').text + ticket_minimum_price = ticket_airline.find_element(:css, '.hdg-sup-price > b').text - temp_ticket_company_info[:ticket_company_name] = ticket_company_name - temp_ticket_company_info[:ticket_minimum_price] = ticket_minimum_price - temp_ticket_company_info[:number_of_ticket_found] = number_of_ticket_found + temp_ticket_airline_info[:ticket_company_name] = ticket_company_name + temp_ticket_airline_info[:ticket_minimum_price] = ticket_minimum_price + temp_ticket_airline_info[:number_of_ticket_found] = number_of_ticket_found - flight_lists = [] - ticket_company_lists = ticket_list.find_elements(:css, '.Act_flight_list') - ticket_company_lists&.each do |flight| - ticket_code = flight.find_elements(:css, '.ticket-summary-row > span')[1].attribute("innerHTML") - ticket_price = flight.find_elements(:css, '.ticket-detail-item .ticket-detail-item-inner .ticket-price > label > b')[0].attribute("innerHTML") - ticket_seat = flight.find_elements(:css, '.ticket-detail-item .ticket-detail-item-inner .ticket-detail-type .ticket-detail-icon .icon-seat')[0].attribute("innerHTML") - ticket_changable_status = flight.find_elements(:css, '.ticket-detail-item .ticket-detail-item-inner .ticket-detail-type .ticket-detail-icon .icon-date')[0].attribute("innerHTML") - ticket_type = flight.find_elements(:css, '.ticket-detail-item .ticket-detail-item-inner .ticket-detail-type .ticket-detail-type-text .ticket-detail-type-text-ellipsis')[0].attribute("innerHTML") + ticket_flight_lists = [] + ticket_airline_flights_lists = ticket_airline.find_elements(:css, '.Act_flight_list') + ticket_airline_flights_lists&.each do |ticket_flight| flight_data = {} - flight_data['flight_code'] = ticket_code - flight_data['flight_price'] = ticket_price - flight_lists.push(flight_data) + flight_data['flight_code'] = ticket_flight.find_elements(:css, '.ticket-summary-row > span')[1].attribute("innerHTML") + flight_data['flight_price'] = ticket_flight.find_elements(:css, '.ticket-detail-item .ticket-detail-item-inner .ticket-price > label > b')[0].attribute("innerHTML") + flight_data['flight_seat'] = ticket_flight.find_elements(:css, '.ticket-detail-item .ticket-detail-item-inner .ticket-detail-type .ticket-detail-icon .icon-seat')[0].attribute("innerHTML") + flight_data['flight_changable_status'] = ticket_flight.find_elements(:css, '.ticket-detail-item .ticket-detail-item-inner .ticket-detail-type .ticket-detail-icon .icon-date')[0].attribute("innerHTML") + flight_data['flight_type'] = ticket_flight.find_elements(:css, '.ticket-detail-item .ticket-detail-item-inner .ticket-detail-type .ticket-detail-type-text .ticket-detail-type-text-ellipsis')[0].attribute("innerHTML") + ticket_flight_lists.push(flight_data) end - temp_ticket_company_info[:flight_lists] = flight_lists - all_tickets_details_lists.push(temp_ticket_company_info) + temp_ticket_airline_info[:ticket_flight_lists] = ticket_flight_lists + all_tickets_details_lists.push(temp_ticket_airline_info) end return all_tickets_details_lists, total_ticket_found end -ticket_search_date_from = Date.new(2021, 12, 31) -ticket_search_date_to = Date.new(2022, 01, 31) -ticket_search_date_from.upto(ticket_search_date_to) do |dt| +TICKET_SEARCH_FROM_DATE.upto(TICKET_SEARCH_TO_DATE) do |dt| departure_date_in = dt.to_s.delete("-") departure_date_out = dt.to_s.delete("-") puts "\n\nTickets for this date " + dt.to_s - start_scraping(departure_date_in, departure_date_out) - check_return_tickets_visibility - tickets_out_list = scrap_ticket_details('out') - all_ticket_out_lists = tickets_out_list[0] - total_ticket_out_found = tickets_out_list[1] + begin + retries ||= 0 + start_scraping(departure_date_in, departure_date_out) + # Wait for few seconds until able to find return tickets list + WAIT.until { WEB_DRIVER.find_element(css: "#Act_response_out .company-list").displayed? } + WAIT.until { WEB_DRIVER.find_element(css: "#Act_response_in .company-list").displayed? } + tickets_out_lists = searching_ticket_type('out') + tickets_in_lists = searching_ticket_type('in') + rescue Exception + puts "retris in check visibility=====" + retries.to_s + retries += 1 + retry if (retries <= MAX_CALL) + raise "Could not get ticket website information: Please give necessary information to search" + end - tickets_in_list = scrap_ticket_details('in') - all_ticket_in_details = tickets_in_list[0] - total_ticket_in_found = tickets_in_list[1] + all_ticket_out_lists = tickets_out_lists[0] + total_ticket_out_found = tickets_out_lists[1] + all_ticket_in_details = tickets_in_lists[0] + total_ticket_in_found = tickets_in_lists[1] puts "Total tickets found for out is = " + total_ticket_out_found.to_s puts "Total tickets found for in is = " + total_ticket_in_found.to_s From 27998355810bde1a99d9c103fddf20da20e254a0 Mon Sep 17 00:00:00 2001 From: Rasel Date: Tue, 14 Dec 2021 15:28:42 +0600 Subject: [PATCH 5/6] scraping site url line break added --- tour_site_scraper.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tour_site_scraper.rb b/tour_site_scraper.rb index 01a8e5f..7f27c72 100644 --- a/tour_site_scraper.rb +++ b/tour_site_scraper.rb @@ -19,8 +19,10 @@ def start_scraping(departure_date_in, departure_date_out) # Generate the search url physically using any date, time and put here, we will make it dynamic later based on requirement - WEB_DRIVER.navigate.to 'https://www.tour.ne.jp/j_air/list/?adult=1&arr_in=TYO&arr_out=CTS&change_date_in=0&change_date_out=0&date_in=' + departure_date_in + '&date_out=' + departure_date_out + '&dpt_in=CTS&dpt_out=TYO&time_from_out=0600&time_to_out=0700&time_type_out=0' - sleep(1) # Wait 1s to load the page properly + WEB_DRIVER.navigate.to "https://www.tour.ne.jp/j_air/list/?adult=1&arr_in=TYO&arr_out=CTS&change_date_in=0&change_date_out=0&" + + "date_in=#{departure_date_in}&date_out=#{departure_date_out}&dpt_in=" + + "CTS&dpt_out=TYO&time_from_out=0600&time_to_out=0700&time_type_out=0" + sleep(1) begin retries ||= 0 ticket_summary_button_out = nil From 332c148a54f8b56de10d8f5edb8a541a092f83b4 Mon Sep 17 00:00:00 2001 From: Rasel Date: Tue, 14 Dec 2021 15:30:10 +0600 Subject: [PATCH 6/6] Removed extra line --- tour_site_scraper.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tour_site_scraper.rb b/tour_site_scraper.rb index 7f27c72..bf60ecc 100644 --- a/tour_site_scraper.rb +++ b/tour_site_scraper.rb @@ -4,6 +4,7 @@ MAX_RETRY = 40 # Maximum retry until the serarch page load in seconds MAX_CALL = 3 # Maximum recall air ticket site if any ajax error or busy page shown + # Put Ticket Search input dates here TICKET_SEARCH_FROM_DATE = Date.new(2021, 12, 31) TICKET_SEARCH_TO_DATE = Date.new(2022, 01, 31) @@ -94,7 +95,6 @@ def searching_ticket_type(ticket_details_type) tickets_out_lists = searching_ticket_type('out') tickets_in_lists = searching_ticket_type('in') rescue Exception - puts "retris in check visibility=====" + retries.to_s retries += 1 retry if (retries <= MAX_CALL) raise "Could not get ticket website information: Please give necessary information to search"