Hello,
I'm writing in C++ a program that download a webpage sourcecode. I'm using Winsock. Here is my sourcecode :
string get_source(string url)
{
WSADATA WSAData;
WSAStartup(MAKEWORD(2,0), &WSAData);
SOCKET sock;
SOCKADDR_IN sin;
char buffer[1024];
string srequete = "GET /Simpsons/ HTTP/1.1\r\n";
srequete += "Host: epguides.com\r\n";
srequete += "Connection: close\r\n";
srequete += "Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5\r\n";
srequete += "Accept-Language: fr,fr-fr;q=0.8,en-us;q=0.5,en;q=0.3\r\n";
srequete += "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7\r\n";
srequete += "User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; fr; rv:1.8.1.3) Gecko/20070309 Firefox/2.0.0.3\r\n";
srequete += "Referer: http://pozzyx.net/\r\n";
srequete += "\r\n";
size_t requete_taille = srequete.size() + 1;
char crequete[requete_taille];
strncpy( crequete, srequete.c_str(), requete_taille );
int i = 0;
string source = "";
sock = socket(AF_INET, SOCK_STREAM, 0);
sin.sin_addr.s_addr = inet_addr("216.239.136.165"); // epguides.com
sin.sin_family = AF_INET;
sin.sin_port = htons(80); // port HTTP.
connect(sock, (SOCKADDR *)&sin, sizeof(sin)); // on se connecte sur le site web.
send(sock, crequete, strlen(crequete), 0); // on envoie la requête HTTP.
do
{
i = recv(sock, buffer, sizeof(buffer), 0); // le buffer récupère les données reçues.
source += buffer;
} while (i != 0);
closesocket(sock); // on ferme le socket.
WSACleanup();
return source;
}
If I try to download http://epguides.com/Simpsons/, no problem ==> :
HTTP/1.1 200 OK
Transfer-Encoding: chunked
Connection: close
Date: Sat, 26 May 2007 16:27:56 GMT
Server: Microsoft-IIS/6.0
--------------: -----
Content-Type: text/html
106E
<html>
<head>
<title>The Simpsons (a Titles & Air Dates Guide)</title>
But when I try with epguides.com/Smallville, I don't get the correct source :
HTTP/1.1 200 OK
Transfer-Encoding: chunked
Connection: close
Date: Sat, 26 May 2007 16:30:08 GMT
Server: Microsoft-IIS/6.0
--------------: -----
Content-Type: text/html
1023
<td><a target="_blank" href="../search/">SEARCH<br />epguides<br />& TV.com</a></td
<td><a href="../FAQ/">FAQ</a></td>
...
Can anyone help me please ?
Sorry for my english but the french coummunities don't find my problem :D
Thanks