php常用功能封装-http访问(多线程版)

  • A+
所属分类:阁主小札
/*
@author: 东阁-晓影
@version: 4.0
@time: 2018/5/19
@require: phpVersion >= 5.6 & CURL扩展
@caption: 完美支持get, post, head等方法
@caption: 支持https(不予加密)
@caption: 根据域名自动管理cookies
@caption: 支持文件上传,大文件下载,并智能命名
@caption: 支持多线程
@caption: 完美模拟浏览器
@caption: win可能在访问localhost & 127.0.0.1会因Nginx问题而无法响应(剧毒)
@licence: GPL v3
 */
class Http
{

    public $ch; //单线程curl句柄
    public $mh; //多线程curl句柄
    public $mh_conn; //多线程curl的子句柄
    public $ua; //UA用户代理
    public $requestTimeout; //请求超时时间
    public $downTimeout; //下载超时时间
    public $referer; //Referer
    public $header; //请求头
    public $nobody; //是否不接收BODY
    public $encode; //解码方式
    public $sslVerify; //ssl验证
    private $report; //是否显示响应头
    public $followLocation; //是否自动跟随重定向

    public $cookie; //三维数组,存储全局所有cookies,自动叠加、更新
    public $downloadManager; //二维数组,记录文件下载信息

    /*
    @构造函数,初始化,使用默认配置
     */
    public function __construct()
    {
        $this->ch              = 0;
        $this->mh              = 0;
        $this->mh_conn         = array();
        $this->ua              = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.4793.400 QQBrowser/10.0.745.400";
        $this->requestTimeout  = 20;
        $this->downTimeout     = null;
        $this->referer         = "https://baidu.com";
        $this->header[0]       = "Content-type: application/x-www-form-urlencoded";
        $this->header[]        = "Accept:*/*";
        $this->header[]        = "Accept-Encoding:gzip,deflate,sdch,gbk,utf-8";
        $this->header[]        = "Accept-Language:zh-CN,zh;q=0.8";
        $this->nobody          = false;
        $this->report          = true;
        $this->encode          = "gzip,utf-8,deflate,gbk";
        $this->sslVerify       = false;
        $this->followLocation  = true;
        $this->cookie          = array();
        $this->downloadManager = array();

    }

    /*
    @析构函数
     */
    public function __destruct()
    {
        if (!empty($this->ch)) {
            curl_close($this->ch);
        }
        if (!empty($this->mh)) {
            curl_multi_close($this->mh);
        }
        if (!empty($this->mh_conn)) {
            foreach ($this->mh_conn as $i => $value) {
                curl_close($this->mh_conn[$i]);
            }
        }
    }

    /*
    @param: string $url
    @param: array $post post数组
    @param: bool $upload //是否上传文件
    @return: array curl选项数组
    @other: 核心函数,动态返回配置
     */
    private function getOptions($url, $post = "", $upload = false)
    {

        $options[CURLOPT_URL] = $url;
        if ($post) {
            if ($upload) {
                $options[CURLOPT_POST]       = 1;
                $options[CURLOPT_POSTFIELDS] = $post;
            } else {
                $options[CURLOPT_POST]       = 1;
                $options[CURLOPT_POSTFIELDS] = http_build_query($post);
            }
        }
        if ($upload) {
            unset($this->header[0]); //清除该请求头以便上传文件
            $options[CURLOPT_HTTPHEADER] = $this->header;
            $this->header[0]             = "Content-type: application/x-www-form-urlencoded"; //恢复
        } else {
            $options[CURLOPT_HTTPHEADER] = $this->header;
        }

        /*公共部分*/

        $options[CURLOPT_SSL_VERIFYPEER] = $this->sslVerify;
        $options[CURLOPT_SSL_VERIFYHOST] = $this->sslVerify;
        $options[CURLOPT_USERAGENT]      = $this->ua;
        $options[CURLOPT_NOBODY]         = $this->nobody;
        $options[CURLOPT_ENCODING]       = $this->encode;
        $options[CURLOPT_REFERER]        = $this->referer;
        $options[CURLOPT_FOLLOWLOCATION] = $this->followLocation;
        $options[CURLOPT_RETURNTRANSFER] = 1;
        $options[CURLOPT_HEADER]         = $this->report;
        $options[CURLOPT_CONNECTTIMEOUT] = $this->requestTimeout;
        if ($this->downTimeout) {
            $options[CURLOPT_TIMEOUT] = $this->downTimeout;
        }
        if ($cookie = $this->cookie($url)) {
            $options[CURLOPT_COOKIE] = $cookie;
        }
        return $options;
    }

    /*
    @param: string $str header & body
    @return: array(string header, string body)
     */
    private function separate($str, $ch = 0)
    {
        if (!$ch) {
            $ch = $this->ch;
        }
        $header = '';
        $body   = '';
        $res    = $this->showErr($ch);
        $code   = $res['code'];
        if ($code == 0) {
            $headerSize = curl_getinfo($ch, CURLINFO_HEADER_SIZE); //响应头SIZE
            $header     = substr($str, 0, $headerSize);
            $body       = substr($str, $headerSize);
        }
        return array(
            "header" => $header,
            "body"   => $body,
        );
    }

/*
@param: string $url | array $urls
@return: string | array
 */
    private function url($url)
    {
        if (!is_array($url)) {
            $url = trim($url);

            if (!preg_match("/^https?:\/\/([\w\-_\.]+(\.[A-z\.]+)*(:[0-9]+)*)/", $url)) {
                throw new Exception("url不合法:" . $url);
            }
            return $url;
        } else {
            $res = array();
            foreach ($url as $value) {
                $value = trim($value);

                if (!preg_match("/^https?:\/\/([\w\-_\.]+(\.[A-z\.]+)*(:[0-9]+)*)/", $value)) {
                    throw new Exception("url不合法:" . $value);
                }
                $res[] = $value;
            }
            return $res;
        }
    }
/*
@param: string $url
@param: string
 */
    private function getDomain($url)
    {
        $domain = "";

        if (preg_match("/^https?:\/\/([\w\-_\.]+(\.[A-z\.]+)*(:[0-9]+)*)/", $url, $matched)) {

            $domain = $matched[1];
        } else {
            throw new Exception("无法识别该URL所在域名:" . $url);
        }
        return $domain;
    }

/*
@param: string $url
@param: string $str header & body
@param: bool $isHeader 是否为header响应头
@param: bool $isStore 是否存往类属性cookie
@param: resource $ch
@return: string 所有标准字符串的cookie
 */
    private function getCookie($url, $str, $isHeader = false, $ch = 0, $isStore = true)
    {
        $domain = $this->getDomain($url);
        if (!$isHeader) {
            $str_arr = $this->separate($str, $ch);
            $str     = $str_arr['header'];
        }
        if (!preg_match_all("/Set-Cookie:\s*(.{1,30}=.{0,512}?;)/", $str, $matched)) {
            return "";
        }
        if ($isStore) {
            foreach ($matched[1] as $value) {
                $item = explode("=", $value);
                for ($i = 1;isset($this->cookie[$domain]) && $i <= count($this->cookie[$domain]) || $i == 1; $i++) {
                    if (isset($this->cookie[$domain][$i - 1]) && $item[0] == $this->cookie[$domain][$i - 1][0]) {
                        $this->cookie[$domain][$i - 1][1] = $item[1];
                    } else {
                        $this->cookie[$domain][] = $item;
                    }
                }

            }
        }
        if (!$isStore) {
            return $this->cookie($url);
        }
    }

/*
@param: string $url
@return: string 返回所有标准字符串的cookie
 */
    public function cookie($url)
    {
        $domain = $this->getDomain($url);
        $cookie = "";
        if (isset($this->cookie[$domain])) {
            foreach ($this->cookie[$domain] as $value) {
                $str = implode("=", $value);
                $cookie .= $str;
            }
        }
        return $cookie;

    }

/*
@param: resource curl句柄
@return: array 本对象最近一次的CURL信息
 */
    public function curlInfo($ch = 0)
    {
        if (!$ch) {
            $ch = $this->ch;
        }
        return curl_getinfo($ch);
    }

/*
@param: resource curl句柄
@return: array 本对象最近一次的CURL错误信息
 */
    public function showErr($ch = 0)
    {
        if (!$ch) {
            $ch = $this->ch;
        }
        return array(
            "code" => curl_errno($ch),
            "msg"  => curl_error($ch),
        );
    }

/*
@param: string $header 响应头
@param: string $url 原生URL
@return: string 文件名称
 */
    private function remoteFileName($header, $url = "")
    {

        //Content-Disposition: attachment;filename=Desktop.zip Content
        if (preg_match("/filename\s*=(.+)\s*Content/", $header, $matched)) {
            return date("YmdHis") . "-" . trim($matched[1]);
        }

        if ($url && preg_match("/\/([^\s\/]+\.[\w\-_]+)([\?#=&%\$].*)*$/", $url, $matched)) {
            return date("YmdHis") . "-" . trim($matched[1]);
        }
        //Content-Type: application/octet-stream Content-Length: 4314252
        if (preg_match("/Content\-Type:\s*([\w\/\-\.\*]+)\s*Content/", $header, $matched)) {

            $type = explode("/", trim($matched[1]));
            if (trim($matched[1]) == "text/plain") {
                return date("YmdHis") . ".txt";
            }
            if (!empty($type[1])) {

                return date("YmdHis") . "." . $type[1];
            }

            return date("YmdHis");
        }
        return date("YmdHis");
    }

/*
@param: string $url
@param: bool $revCookie 是否接受cookie至类属性cookie
@param: bool $showHeader 是否输出响应头,在$options[CURLOPT_HEADER]为false时无效
@return: string 请求结果
 */
    public function get($url, $revCookie = false, $showHeader = false)
    {
        $url      = $this->url($url);
        $this->ch = curl_init();
        $options  = $this->getOptions($url);
        curl_setopt_array($this->ch, $options);
        $output = curl_exec($this->ch);

        if ($revCookie) {

            $this->getCookie($url, $output);
        }
        if (!$showHeader && $options[CURLOPT_HEADER]) {
            $s_res  = $this->separate($output);
            $output = $s_res['body'];
        }
        return $output;
    }

/*
@param: string $url
@param: bool $revCookie 是否接受cookie至类属性cookie
@param: bool $showHeader 是否输出响应头,在$options[CURLOPT_HEADER]为false时无效
@return: string 请求结果,关闭$this->report则为空
 */
    public function head($url, $revCookie = false, $showHeader = false)
    {
        $url                     = $this->url($url);
        $this->ch                = curl_init();
        $options                 = $this->getOptions($url);
        $options[CURLOPT_NOBODY] = true;
        curl_setopt_array($this->ch, $options);
        $output = curl_exec($this->ch);
        if ($revCookie) {

            $this->getCookie($url, $output);

        }
        if (!$showHeader && $options[CURLOPT_HEADER]) {
            $s_res  = $this->separate($output);
            $output = $s_res['body'];
        }
        return $output;
    }

/*
@param: string $url
@param: array $post 内容数组,会自动urlencode
@param: bool $revCookie 是否接受cookie至类属性cookie
@param: bool $showHeader 是否输出响应头,在$options[CURLOPT_HEADER]为false时无效
@return: string 请求结果
 */
    public function post($url, $post = "", $revCookie = true, $showHeader = false)
    {
        $url      = $this->url($url);
        $this->ch = curl_init();
        $options  = $this->getOptions($url, $post);

        curl_setopt_array($this->ch, $options);
        $output = curl_exec($this->ch);

        if ($revCookie) {
            $this->getCookie($url, $output);
        }
        if (!$showHeader && $options[CURLOPT_HEADER]) {
            $s_res  = $this->separate($output);
            $output = $s_res['body'];
        }
        return $output;

    }

/*
@param: string $url
@param: bool 是否接受cookie
@return: array 下载成功返回下载信息数组,否则空数组。自动命名,建议设置$this->downTimeout
@other: 下载到当前目录,建议下载小文件,否则内存会溢出
 */
    public function download($url, $revCookie = false)
    {
        $url = $this->url($url);

        $content = $this->get($url, $revCookie, true);
        if (curl_getinfo($this->ch, CURLINFO_HTTP_CODE) != "200") {
            throw new Exception("HTTP状态码异常:" . curl_getinfo($this->ch, CURLINFO_HTTP_CODE));
        }
        $con_arr = $this->separate($content);
        if (empty($con_arr['body'])) {
            return array();
        }
        $name = $this->remoteFileName($con_arr['header'], $url);
        file_put_contents($name, $con_arr['body']);
        $info                        = $this->curlInfo();
        $downInfo['code']            = $info['http_code'];
        $downInfo['time']            = $info['total_time'];
        $downInfo['speed']           = (float) $info['speed_download'] / 1024.0; //kb/s
        $downInfo['name']            = $name;
        $downInfo['type']            = $info['content_type'];
        $downInfo['size']            = (float) $info['size_download'] / 1024.0; //kb
        $downInfo['num']             = count($this->downloadManager);
        $downInfo['realUrl']         = $info['url'];
        $this->downloadManager[$url] = $downInfo;
        return $downInfo;
    }

/*
@param: string $url
@param: bool 是否接受cookie
@return: array 下载成功返回下载信息数组,否则空数组。自动命名,建议设置$this->downTimeout
@other: 下载到当前目录,直接下载到硬盘,大文件首选
 */
    public function downloadBig($url, $revCookie = false)
    {
        $url    = $this->url($url);
        $output = $this->head($url, $revCookie, true);
        if (curl_getinfo($this->ch, CURLINFO_HTTP_CODE) != "200") {
            throw new Exception("HTTP状态码异常:" . curl_getinfo($this->ch, CURLINFO_HTTP_CODE));
        }
        $name = $this->remoteFileName($output, $url);
        $fp   = fopen($name, "w");
        flock($fp, LOCK_EX); //锁定文件
        $this->ch = curl_init();

        $ch                             = &$this->ch;
        $options                        = $this->getOptions($url);
        $options[CURLOPT_HEADER]        = false; //避免下载响应头
        $options[CURLOPT_WRITEFUNCTION] = function ($ch, $str) use (&$fp) {
            $length = fwrite($fp, $str);
            return $length;
        };
        curl_setopt_array($this->ch, $options);
        $res = curl_exec($this->ch);
        flock($fp, LOCK_UN);
        fclose($fp);
        if ($res) {
            $info                        = $this->curlInfo();
            $downInfo['code']            = $info['http_code'];
            $downInfo['time']            = $info['total_time'];
            $downInfo['speed']           = (float) $info['speed_download'] / 1024.0; //kb/s
            $downInfo['type']            = $info['content_type'];
            $downInfo['size']            = (float) $info['size_download'] / 1024.0; //kb
            $downInfo['name']            = $name;
            $downInfo['num']             = count($this->downloadManager);
            $downInfo['realUrl']         = $info['url'];
            $this->downloadManager[$url] = $downInfo;
            return $downInfo;
        } else {
            return array();
        }
    }

/*
@param: string $url
@param: array $post 内容数组,不会自动urlencode,php5.5后使用curl_file_create()来获取文件
@param: bool $revCookie 是否接受cookie至类属性cookie
@param: bool $showHeader 是否输出响应头,在$options[CURLOPT_HEADER]为false时无效
@return: string 请求结果
 */
    public function upload($url, $post = "", $revCookie = true, $showHeader = false)
    {
        $url      = $this->url($url);
        $this->ch = curl_init();
        $options  = $this->getOptions($url, $post, true);
        curl_setopt_array($this->ch, $options);
        $output = curl_exec($this->ch);

        if ($revCookie) {
            $this->getCookie($url, $output);
        }
        if (!$showHeader && $options[CURLOPT_HEADER]) {
            $s_res  = $this->separate($output);
            $output = $s_res['body'];
        }
        return $output;

    }

    /*****多线程部分*****/
/*
@param: array $url 一维数组
@param: bool $revCookie 是否接受cookie
@return array 三维数组,包含每个请求的header&body
 */
    public function multi_get($urls, $revCookie = false)
    {
        $this->mh = curl_multi_init();
        if (!is_array($urls)) {
            throw new Exception("第一个参数必须为一维数组");
        }
        $urls = $this->url($urls);
        foreach ($urls as $i => $value) {
            $this->mh_conn[$i] = curl_init();
            $options           = $this->getOptions($value);
            curl_setopt_array($this->mh_conn[$i], $options);
            curl_multi_add_handle($this->mh, $this->mh_conn[$i]);
        }
        //当有线程活动时,执行
        $active = null;
        do {
            $mrc = curl_multi_exec($this->mh, $active);
        } while ($mrc == CURLM_CALL_MULTI_PERFORM);

        while ($active && $mrc == CURLM_OK) {
            // 等待线程连接
            if (curl_multi_select($this->mh) == -1) {
                sleep(1);
            }

            // 继续执行当curl就绪时

            do {
                $mrc = curl_multi_exec($this->mh, $active);

            } while ($mrc == CURLM_CALL_MULTI_PERFORM);
        }
        //获取内容
        $res = array();

        foreach ($urls as $i => $value) {

            $m_res = curl_multi_getcontent($this->mh_conn[$i]);

            if ($options[CURLOPT_HEADER]) {
                $s_res               = $this->separate($m_res, $this->mh_conn[$i]);
                $num                 = count($res);
                $res[$num]['header'] = $s_res['header'];
                $res[$num]['body']   = $s_res['body'];
                if ($revCookie) {
                    $this->getCookie($value, $s_res['header'], true, $this->mh_conn[$i]);
                }

            }
        }
        //移除单个curl句柄
        foreach ($urls as $i => $value) {
            curl_multi_remove_handle($this->mh, $this->mh_conn[$i]);
        }
        return $res;
    }
/*
@param: array $url 一维数组
@param: bool $revCookie 是否接受cookie
@return array 三维数组,包含每个请求的header
 */
    public function multi_head($urls, $revCookie = false)
    {
        $this->mh = curl_multi_init();
        if (!is_array($urls)) {
            throw new Exception("第一个参数必须为一维数组");
        }
        $urls = $this->url($urls);
        foreach ($urls as $i => $value) {
            $this->mh_conn[$i]       = curl_init();
            $options                 = $this->getOptions($value);
            $options[CURLOPT_NOBODY] = true;
            curl_setopt_array($this->mh_conn[$i], $options);
            curl_multi_add_handle($this->mh, $this->mh_conn[$i]);
        }
        //当有线程活动时,执行
        $active = null;
        do {
            $mrc = curl_multi_exec($this->mh, $active);
        } while ($mrc == CURLM_CALL_MULTI_PERFORM);

        while ($active && $mrc == CURLM_OK) {
            // 等待线程连接
            if (curl_multi_select($this->mh) == -1) {
                sleep(1);
            }

            // 继续执行当curl就绪时

            do {
                $mrc = curl_multi_exec($this->mh, $active);

            } while ($mrc == CURLM_CALL_MULTI_PERFORM);
        }
        //获取内容
        $res = array();

        foreach ($urls as $i => $value) {
            $m_res = curl_multi_getcontent($this->mh_conn[$i]);
            if ($options[CURLOPT_HEADER]) {
                $s_res               = $this->separate($m_res, $this->mh_conn[$i]);
                $num                 = count($res);
                $res[$num]['header'] = $s_res['header'];
                $res[$num]['body']   = $s_res['body'];
                if ($revCookie) {
                    $this->getCookie($value, $s_res['header'], true, $this->mh_conn[$i]);
                }

            }
        }
        //移除单个curl句柄
        foreach ($urls as $i => $value) {
            curl_multi_remove_handle($this->mh, $this->mh_conn[$i]);
        }
        return $res;
    }
    /*
    @param: array $url 一维数组
    @param: array $post post数组
    @param: bool $revCookie 是否接受cookie
    @return array 三维数组,包含每个请求的header&body
     */
    public function multi_post($urls, $post = 0, $revCookie = false)
    {
        $this->mh = curl_multi_init();
        if (!is_array($urls)) {
            throw new Exception("第一个参数必须为一维数组");
        }
        $urls = $this->url($urls);
        foreach ($urls as $i => $value) {
            $this->mh_conn[$i] = curl_init();
            $options           = $this->getOptions($value, $post);

            curl_setopt_array($this->mh_conn[$i], $options);
            curl_multi_add_handle($this->mh, $this->mh_conn[$i]);
        }
        //当有线程活动时,执行
        $active = null;
        do {
            $mrc = curl_multi_exec($this->mh, $active);
        } while ($mrc == CURLM_CALL_MULTI_PERFORM);

        while ($active && $mrc == CURLM_OK) {
            // 等待线程连接
            if (curl_multi_select($this->mh) == -1) {
                sleep(1);
            }

            // 继续执行当curl就绪时

            do {
                $mrc = curl_multi_exec($this->mh, $active);

            } while ($mrc == CURLM_CALL_MULTI_PERFORM);
        }
        //获取内容
        $res = array();

        foreach ($urls as $i => $value) {

            $m_res = curl_multi_getcontent($this->mh_conn[$i]);

            if ($options[CURLOPT_HEADER]) {
                $s_res               = $this->separate($m_res, $this->mh_conn[$i]);
                $num                 = count($res);
                $res[$num]['header'] = $s_res['header'];
                $res[$num]['body']   = $s_res['body'];
                if ($revCookie) {
                    $this->getCookie($value, $s_res['header'], true, $this->mh_conn[$i]);
                }

            }
        }
        //移除单个curl句柄
        foreach ($urls as $i => $value) {
            curl_multi_remove_handle($this->mh, $this->mh_conn[$i]);
        }
        return $res;
    }

}

weinxin
画麟阁QQ群
这是一个有爱的大家庭,也是东阁唯一的社区,快来和大家一起闲聊、讨论吧!
谷雨

发表评论

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen: