socket抓取网页

时间:2023-03-09 06:16:53
socket抓取网页
#include <iostream>
#include <string>
#include <netdb.h>
#include <stdio.h>
#include <stdlib.h>
#include <arpa/inet.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <strings.h>
#include <string.h>
#include <unistd.h>
using namespace std; void func()
{
string url;
cout << "输入网址:" << endl;
cin >> url; //依据域名获取ip地址
struct hostent *website_host = NULL;
website_host = gethostbyname(url.c_str());
if (website_host == NULL)
{
perror("gethostbyname error");
exit(-1);
}
cout << "主机名称:";
cout << website_host->h_name << endl;
cout << "地址类型:";
cout << website_host->h_addrtype << endl;
cout << "地址长度:";
cout << website_host->h_length << endl; //建立socket描写叙述符
int sockfd;
sockfd = socket(AF_INET, SOCK_STREAM, 0);
if (sockfd == -1)
{
perror("socket error");
exit(-1);
}
cout << "建立socket完毕" << endl; //初始化地址结构
struct sockaddr_in website_addr;
bzero((void*)&website_addr, sizeof(website_addr));
website_addr.sin_family = AF_INET;
website_addr.sin_port = htons(80);
website_addr.sin_addr.s_addr = ((struct in_addr *)(website_host->h_addr))->s_addr;
cout << "地址初始化完毕" << endl; //连接
int ret;
ret = connect(sockfd, (struct sockaddr*)&website_addr, sizeof(website_addr));
if (ret == -1)
{
perror("connect error");
exit(-1);
}
cout << "连接完毕" << endl; //向80端口发送http头
char buf[10*1024];
char addr[100];
sprintf(buf, "GET / HTTP/1.1\r\n");
strcat(buf, "Host:");
strcat(buf,url.c_str());
strcat(buf, "\r\n");
strcat(buf, "Accept: */*\r\n");
strcat(buf, "User-Agent: Mozilla/4.0(compatible)\r\n");
strcat(buf, "connection:Keep-Alive\r\n");
strcat(buf, "\r\n\r\n");
cout << "请求头构造完毕" << endl;
cout << buf << endl;
ret = send(sockfd, buf, strlen(buf), 0);
cout << "发送完毕" << endl;
cout << "send:\n" << ret << endl; //打开接收文件
int fd;
fd = open("recv.html", O_RDWR);
if (fd == -1)
{
perror("open error");
exit(-1);
} //開始接收
while(1)
{
ret = recv(sockfd, buf, sizeof(buf), 0);
if (ret == 0)
{
cout << "对端关闭" << endl;
exit(-1);
}
if (ret == -1)
{
perror("read error");
exit(-1);
}
buf[ret] = 0;
cout << "recv:" << ret << endl;
cout << buf << endl;
write(fd, buf, strlen(buf));
}
} int main()
{
func();
return 0;
}

socket抓取网页

socket抓取网页

socket抓取网页

版权声明:本文博客原创文章,博客,未经同意,不得转载。