本篇文章主要是记录一下使用CURL的坑
- 0x01 类的定义
- 0x02 主要请求方法的实现
- 0x03 POST与GET的区别
- 0x04 获取标签内容 - HTML解析
- 0x05 根据键名获取键值 - 搜集信息
- 0x06 源代码
- 0x07 参考
- 0x08 结语
0x01 类的定义
class http {
public:
http();
http(std::string url, unsigned int port = 80); // 设置请求地址以及端口
bool setURL(std::string url); // 设置请求地址
bool setPort(unsigned int port); // 设置端口
bool getRequest(); // get 方法
bool postRequest(std::string data = ""); // post 方法
bool setCookie(std::string cookie = "");
bool addRequestHeaders(std::string header); // 添加请求头
std::string getTitle(); // 获取标题
std::string getResponse(); // 获取响应内容
std::string getResponse(std::string Key); // 根据指定的键名获取键值 例子:getResponse("Server"); // BWS/1.1
~http(); // 释放资源
private:
unsigned int _responseCode; // 响应码
unsigned int _timeout = 2; // 超时时间
unsigned short int _connectTimeout = 1; // 连接超时时间
bool _request(); // 核心请求方法
CURL * curl = NULL; // curl指针
CURLcode _res; // 请求返回值
std::string _url; // 请求地址
unsigned int _port; // 请求端口
std::string _title; // 响应标题
std::string _responseHeader; // 响应头
std::string _responseBody; // 响应主体内容
bool isRequested; // 是否请求完毕
curl_slist * _requestHeaders=NULL; // 请求头列表 headers
curl_slist * _responseCookie=NULL; // 响应Cookie
static size_t _callBackWrite(char *ptr, size_t n, size_t m, std::string *data); // 回调函数,用于将响应内容放入内存操作
};
0x02 主要请求方法的实现
/**
* 发送请求
* @return
*/
bool http::_request() {
curl_easy_setopt(curl,CURLOPT_TIMEOUT,_timeout);
curl_easy_setopt(curl,CURLOPT_CONNECTTIMEOUT,_connectTimeout);
curl_easy_setopt(curl,CURLOPT_SSL_VERIFYPEER,false);
curl_easy_setopt(curl,CURLOPT_SSL_VERIFYHOST,false);
curl_easy_setopt(curl,CURLOPT_WRITEFUNCTION,_callBackWrite);
curl_easy_setopt(curl,CURLOPT_WRITEHEADER,&_responseHeader);
curl_easy_setopt(curl,CURLOPT_WRITEDATA,&_responseBody);
if(_requestHeaders != NULL){
curl_easy_setopt(curl,CURLOPT_HTTPHEADER,_requestHeaders);
}
curl_easy_setopt(curl, CURLOPT_FORBID_REUSE, 1);
_res = curl_easy_perform(curl);
curl_easy_getinfo(curl,CURLINFO_RESPONSE_CODE,&_responseCode);
isRequested = true;
}
0x03 POST与GET的区别
/**
* post请求
* @param data
* @return
*/
bool http::postRequest(std::string data) {
curl_easy_setopt(curl,CURLOPT_POST,1);
curl_easy_setopt(curl,CURLOPT_POSTFIELDS,data.c_str());
_request();
}
/**
* get请求
* @return
*/
bool http::getRequest() {
_request();
}
POST比GET多两步操作,需要设置两个值
0x04 获取标签内容 - HTML解析
使用htmlcxx
库,安装命令:sudo apt-get install libhtmlcxx-dev
包含头文件:#include <htmlcxx/html/ParserDom.h>
/**
* 返回页面标题
* @return
*/
std::string http::getTitle() {
htmlcxx::HTML::ParserDom parserDom;
tree<htmlcxx::HTML::Node>dom = parserDom.parseTree(_responseBody);
tree<htmlcxx::HTML::Node>::iterator it = dom.begin();
tree<htmlcxx::HTML::Node>::iterator end = dom.end();
for(; it != end; ++it)
{
if (it->tagName()=="title") // 如果标签名为title
{
it++;
_title.append(it->text()); // 返回 title 标签内容
}
}
return _title;
}
0x05 根据键名获取键值 - 搜集信息
主要是字符串的操作
/**
* 通过键名获取键值
* @param Key
* @return
*/
std::string http::getResponse(std::string Key) {
std::string _del = "\r\n";
std::string::size_type pos1, pos2;
std::string _lineStr;
pos2 = _responseHeader.find(_del);
pos1 = 0;
while(std::string::npos != pos2)
{
_lineStr = _responseHeader.substr(pos1, pos2-pos1);
if(_lineStr.find(Key) == 0){
return _lineStr.substr(Key.length()+2);
}
pos1 = pos2 + _del.size();
pos2 = _responseHeader.find(_del, pos1);
}
if(pos1 != _responseHeader.length()){
if(_lineStr.find(Key) == 0){
return _lineStr.substr(Key.length()+2);
}
}
return NULL;
}
0x06 源代码
http.h
//
// Created by payloads on 1/22/18.
//
#ifndef MYPROJECT_HTTP_H
#define MYPROJECT_HTTP_H
#include <iostream>
#include <string>
#include <vector>
#include <regex>
#include <curl/curl.h>
#include <htmlcxx/html/ParserDom.h>
#include <sstream>
class http {
public:
http();
http(std::string url, unsigned int port = 80); // 设置请求地址以及端口
bool setURL(std::string url); // 设置请求地址
bool setPort(unsigned int port); // 设置端口
bool getRequest(); // get 方法
bool postRequest(std::string data = ""); // post 方法
bool setCookie(std::string cookie = "");
bool addRequestHeaders(std::string header);
std::string getTitle(); // 获取标题
std::string getResponse(); // 获取响应内容
std::string getResponse(std::string Key);
~http();
private:
unsigned int _responseCode;
unsigned int _timeout = 2;
unsigned short int _connectTimeout = 1;
bool _request();
CURL * curl = NULL;
CURLcode _res;
std::string _url;
unsigned int _port;
std::string _title;
std::string _responseHeader;
std::string _responseBody;
bool isRequested;
curl_slist * _requestHeaders=NULL;
curl_slist * _responseCookie=NULL;
static size_t _callBackWrite(char *ptr, size_t n, size_t m, std::string *data);
};
#endif //MYPROJECT_HTTP_H
http.cpp
//
// Created by payloads on 1/22/18.
//
#include "http.h"
/**
* 初始化请求对象
*/
http::http() {
if(curl == NULL){
isRequested = false;
curl = curl_easy_init();
}
}
/**
* 回收资源
*/
http::~http() {
curl_slist_free_all(_requestHeaders);
curl_slist_free_all(_responseCookie);
curl_easy_cleanup(curl);
}
/**
* 设置请求地址和端口
* @param url
* @param port
*/
http::http(std::string url, unsigned int port) {
if(curl == NULL){
curl = curl_easy_init();
_url = url;
if(url.substr(0,5) == "https"){
_port = 443;
}else{
_port = port;
}
curl_easy_setopt(curl,CURLOPT_URL,_url.c_str());
curl_easy_setopt(curl,CURLOPT_PORT,_port);
}
}
/**
* 设置请求地址
* @param url
* @return
*/
bool http::setURL(std::string url) {
curl_easy_setopt(curl,CURLOPT_URL,url.c_str());
}
/**
* 设置端口
* @param port
* @return
*/
bool http::setPort(unsigned int port) {
curl_easy_setopt(curl,CURLOPT_PORT,port);
}
/**
* 发送请求
* @return
*/
bool http::_request() {
curl_easy_setopt(curl,CURLOPT_TIMEOUT,_timeout);
curl_easy_setopt(curl,CURLOPT_CONNECTTIMEOUT,_connectTimeout);
curl_easy_setopt(curl,CURLOPT_SSL_VERIFYPEER,false);
curl_easy_setopt(curl,CURLOPT_SSL_VERIFYHOST,false);
curl_easy_setopt(curl,CURLOPT_WRITEFUNCTION,_callBackWrite);
curl_easy_setopt(curl,CURLOPT_WRITEHEADER,&_responseHeader);
curl_easy_setopt(curl,CURLOPT_WRITEDATA,&_responseBody);
if(_requestHeaders != NULL){
curl_easy_setopt(curl,CURLOPT_HTTPHEADER,_requestHeaders);
}
curl_easy_setopt(curl, CURLOPT_FORBID_REUSE, 1);
_res = curl_easy_perform(curl);
curl_easy_getinfo(curl,CURLINFO_RESPONSE_CODE,&_responseCode);
isRequested = true;
}
/**
* 回调函数
* @param ptr
* @param n
* @param m
* @param data
* @return
*/
size_t http::_callBackWrite(char *ptr, size_t n, size_t m, std::string *data) {
if (data == NULL) return 0;
data->append(ptr,n*m);
return n*m;
}
/**
* get请求
* @return
*/
bool http::getRequest() {
_request();
}
/**
* post请求
* @param data
* @return
*/
bool http::postRequest(std::string data) {
curl_easy_setopt(curl,CURLOPT_POST,1);
curl_easy_setopt(curl,CURLOPT_POSTFIELDS,data.c_str());
_request();
}
/**
* 设置Cookie
* @param cookie
* @return
*/
bool http::setCookie(std::string cookie) {
curl_easy_setopt(curl,CURLOPT_COOKIE,cookie.c_str());
}
/**
* 设置请求头
* @param header
* @return
*/
bool http::addRequestHeaders(std::string header) {
_requestHeaders = curl_slist_append(_requestHeaders,header.c_str());
}
/**
* 返回页面标题
* @return
*/
std::string http::getTitle() {
htmlcxx::HTML::ParserDom parserDom;
tree<htmlcxx::HTML::Node>dom = parserDom.parseTree(_responseBody);
tree<htmlcxx::HTML::Node>::iterator it = dom.begin();
tree<htmlcxx::HTML::Node>::iterator end = dom.end();
for(; it != end; ++it)
{
if (it->tagName()=="title")
{
it++;
_title.append(it->text());
}
}
return _title;
}
/**
* 返回响应头
* @return
*/
std::string http::getResponse() {
return _responseHeader;
}
/**
* 通过键名获取键值
* @param Key
* @return
*/
std::string http::getResponse(std::string Key) {
std::string _del = "\r\n";
std::string::size_type pos1, pos2;
std::string _lineStr;
pos2 = _responseHeader.find(_del);
pos1 = 0;
while(std::string::npos != pos2)
{
_lineStr = _responseHeader.substr(pos1, pos2-pos1);
if(_lineStr.find(Key) == 0){
return _lineStr.substr(Key.length()+2);
}
pos1 = pos2 + _del.size();
pos2 = _responseHeader.find(_del, pos1);
}
if(pos1 != _responseHeader.length()){
if(_lineStr.find(Key) == 0){
return _lineStr.substr(Key.length()+2);
}
}
return NULL;
}
main.cpp
#include <iostream>
#include "http.h"
int main(int argc,char * argv[]) {
std::cout << "args count :" << argc << std::endl;
http client("https://www.baidu.com/");
if(argc >= 2){
client.setURL(argv[1]);
}
client.getRequest();
std::cout << client.getTitle()<<std::endl;
std::cout << client.getResponse("Set-Cookie")<<std::endl;
return 0;
}
0x07 参考
- C++ Html解析器-HtmlCxx用户手册和源代码解析 : https://www.cnblogs.com/zhanglanyun/archive/2011/10/21/2220647.html
- C++ 分割字符串两种方法:https://www.cnblogs.com/dfcao/p/cpp-FAQ-split.html
- libcurl - API:https://curl.haxx.se/libcurl/c/
0x08 结语
后面继续扩展代理功能、并不是很难
希望能够把轮子造好,为后面的项目开发做准备。