-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathWebCrawler.cpp
More file actions
101 lines (86 loc) · 3.28 KB
/
WebCrawler.cpp
File metadata and controls
101 lines (86 loc) · 3.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
//============================================================================
// Name : WebCrawler.cpp
// Author : Friski
// Version : 0.1
// Copyright : FriskiPr's Copyright 2016
// Description : Sample of web crawler using boost.asio
//============================================================================
#include <iostream>
#include <fstream>
#include <string>
#include <boost/asio.hpp>
using namespace std;
using namespace boost::asio;
using boost::asio::ip::tcp;
int main(int argc, char* argv[])
{
string serverName = "www.mitrais.com";
string pathName = "/en/about";
try
{
boost::asio::io_service io_service;
// Get a list of endpoints corresponding to the server name.
tcp::resolver resolver(io_service);
tcp::resolver::query query(serverName, "http");
tcp::resolver::iterator endpoint_iterator = resolver.resolve(query);
// Try each endpoint until we successfully establish a connection.
tcp::socket socket(io_service);
boost::asio::connect(socket, endpoint_iterator);
// Form the request. We specify the "Connection: close" header so that the
// server will close the socket after transmitting the response. This will
// allow us to treat all data up until the EOF as the content.
boost::asio::streambuf request;
std::ostream request_stream(&request);
request_stream << "GET " << pathName << " HTTP/1.0\r\n";
request_stream << "Host: " << serverName << "\r\n";
request_stream << "Accept: */*\r\n";
request_stream << "Connection: close\r\n\r\n";
// Send the request.
boost::asio::write(socket, request);
// Read the response status line. The response streambuf will automatically
// grow to accommodate the entire line. The growth may be limited by passing
// a maximum size to the streambuf constructor.
boost::asio::streambuf response;
boost::asio::read_until(socket, response, "\r\n");
// Check that response is OK.
std::istream response_stream(&response);
std::string http_version;
response_stream >> http_version;
unsigned int status_code;
response_stream >> status_code;
std::string status_message;
std::getline(response_stream, status_message);
if (!response_stream || http_version.substr(0, 5) != "HTTP/")
{
std::cout << "Invalid response\n";
return 1;
}
if (status_code != 200)
{
std::cout << "Response returned with status code " << status_code << "\n";
return 1;
}
// Read the response headers, which are terminated by a blank line.
boost::asio::read_until(socket, response, "\r\n\r\n");
// Process the response headers.
std::string header;
while (std::getline(response_stream, header) && header != "\r")
std::cout << header << "\n";
std::cout << "\n";
// Write whatever content we already have to output.
if (response.size() > 0)
std::cout << &response;
// Read until EOF, writing data to output as we go.
boost::system::error_code error;
while (boost::asio::read(socket, response,
boost::asio::transfer_at_least(1), error))
std::cout << &response;
if (error != boost::asio::error::eof)
throw boost::system::system_error(error);
}
catch (std::exception& e)
{
std::cout << "Exception: " << e.what() << "\n";
}
return 0;
}