This commit is contained in:
xiaomlove
2020-12-26 01:42:23 +08:00
commit a3ba82be64
949 changed files with 60612 additions and 0 deletions

View File

@@ -0,0 +1,270 @@
<?php
/***************************************************************************
Browser Emulating file functions v2.0
(c) Kai Blankenhorn
www.bitfolge.de/en
kaib@bitfolge.de
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
****************************************************************************
Changelog:
v2.0 03-09-03
added a wrapper class; this has the advantage that you no longer need
to specify a lot of parameters, just call the methods to set
each option
added option to use a special port number, may be given by setPort or
as part of the URL (e.g. server.com:80)
added getLastResponseHeaders()
v1.5
added Basic HTTP user authorization
minor optimizations
v1.0
initial release
***************************************************************************/
/**
* BrowserEmulator class. Provides methods for opening urls and emulating
* a web browser request.
**/
class BrowserEmulator {
var $headerLines = Array ();
var $postData = Array ();
var $authUser = "";
var $authPass = "";
var $port;
var $lastResponse = Array ();
function BrowserEmulator () {
$this->resetHeaderLines ();
$this->resetPort ();
}
/**
* Adds a single header field to the HTTP request header. The resulting header
* line will have the format
* $name: $value\n
**/
function addHeaderLine ($name, $value) {
$this->headerLines[$name] = $value;
}
/**
* Deletes all custom header lines. This will not remove the User-Agent header field,
* which is necessary for correct operation.
**/
function resetHeaderLines () {
$this->headerLines = Array ();
/*******************************************************************************/
/************** YOU MAX SET THE USER AGENT STRING HERE *******************/
/* */
/* default is "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)", */
/* which means Internet Explorer 6.0 on WinXP */
$this->headerLines["User-Agent"] =
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)";
/*******************************************************************************/
}
/**
* Add a post parameter. Post parameters are sent in the body of an HTTP POST request.
**/
function addPostData ($name, $value) {
$this->postData[$name] = $value;
}
/**
* Deletes all custom post parameters.
**/
function resetPostData () {
$this->postData = Array ();
}
/**
* Sets an auth user and password to use for the request.
* Set both as empty strings to disable authentication.
**/
function setAuth ($user, $pass) {
$this->authUser = $user;
$this->authPass = $pass;
}
/**
* Selects a custom port to use for the request.
**/
function setPort ($portNumber) {
$this->port = $portNumber;
}
/**
* Resets the port used for request to the HTTP default (80).
**/
function resetPort () {
$this->port = 80;
}
/**
* Make an fopen call to $url with the parameters set by previous member
* method calls. Send all set headers, post data and user authentication data.
* Returns a file handle on success, or false on failure.
**/
function fopen ($url) {
$debug = false;
$this->lastResponse = Array ();
preg_match ("~([a-z]*://)?([^:^/]*)(:([0-9]{1,5}))?(/.*)?~i", $url,
$matches);
if ($debug)
var_dump ($matches);
$protocol = $matches[1];
$server = $matches[2];
$port = $matches[4];
$path = $matches[5];
if ($port != "") {
$this->setPort ($port);
}
if ($path == "")
$path = "/";
$socket = false;
$socket = fsockopen ($server, $this->port);
if ($socket) {
$this->headerLines["Host"] = $server;
if ($this->authUser != "" AND $this->authPass != "") {
$headers["Authorization"] =
"Basic ".base64_encode ($this->authUser.":".$this->
authPass);
}
if (count ($this->postData) == 0) {
$request = "GET $path HTTP/1.0\r\n";
}
else {
$request = "POST $path HTTP/1.0\r\n";
}
if ($debug)
echo $request;
fputs ($socket, $request);
if (count ($this->postData) > 0) {
$PostStringArray = Array ();
foreach ($this->postData AS $key => $value) {
$PostStringArray[] = "$key=$value";
}
$PostString = join ("&", $PostStringArray);
$this->headerLines["Content-Length"] =
strlen ($PostString);
}
foreach ($this->headerLines AS $key => $value) {
if ($debug)
echo "$key: $value\n";
fputs ($socket, "$key: $value\r\n");
}
if ($debug)
echo "\n";
fputs ($socket, "\r\n");
if (count ($this->postData) > 0) {
if ($debug)
echo "$PostString";
fputs ($socket, $PostString."\r\n");
}
}
if ($debug)
echo "\n";
if ($socket) {
$line = fgets ($socket, 1000);
if ($debug)
echo $line;
$this->lastResponse[] = $line;
$status = substr ($line, 9, 3);
while (trim ($line = fgets ($socket, 1000)) != "") {
if ($debug)
echo "$line";
$this->lastResponse[] = $line;
if ($status == "401" AND strpos ($line, "WWW-Authenticate: Basic realm=\"") === 0) {
fclose ($socket);
return FALSE;
}
}
}
return $socket;
}
/**
* Make an file call to $url with the parameters set by previous member
* method calls. Send all set headers, post data and user authentication data.
* Returns the requested file as an array on success, or false on failure.
**/
function file ($url) {
$file = Array ();
$socket = $this->fopen ($url);
if ($socket) {
$file = Array ();
while (!feof ($socket)) {
$file[] = fgets ($socket, 10000);
}
}
else {
return FALSE;
}
return $file;
}
function getLastResponseHeaders () {
return $this->lastResponse;
}
}
// example code
/*
$be = new BrowserEmulator();
//$be->addHeaderLine("Referer", "http://previous.server.com/");
//$be->addHeaderLine("Accept-Encoding", "x-compress; x-zip");
//$be->addPostData("Submit", "OK");
//$be->addPostData("item", "42");
//$be->setAuth("admin", "secretpass");
// also possible:
// $be->setPort(10080);
$file = $be->fopen("http://us.imdb.com/Title?0209144");
$response = $be->getLastResponseHeaders();
while ($line = fgets($file, 1024)) {
// do something with the file
echo $line;
}
fclose($file);
*/
?>

View File

@@ -0,0 +1,114 @@
<?php
class info_extractor{
function info_extractor(){}
/** truncate a given string
* @method truncate
* @param string src(source string), string s_str(starting needle), string e_str(ending needle, "" for open end), integer e_offset(optional where to start finding the $e_str)
* @return string trucated string
*/
function truncate($src, $s_str, $e_str = "", $e_offset = 0)
{
$ret = "";
$e_offset = strlen($s_str);
$ret = strstr($src, $s_str);
if($ret == false)
return "";
if($e_str != "")
{
$endpos = strpos ($ret , $e_str, $e_offset);
if($endpos == false)
return "";
}
return substr($ret, strlen($s_str), $endpos - strlen($s_str));
}
/** find a certain pattern in a given string
* @method find_pattern
* @param string src(source string), string regex(regular expression), boolean multiple(if pattern has multiple occurance), array string res_where_array(where the res should be in regex, order of res_where_array and res_array should be the same, for example: res_array could be "array(array('Name' => '', 'Cloudsize' => '', 'Link' => ''))", then the first element in res_where_array could be, say, "3", which corrsponds to 'Name'), array string res_array(one or multi-dimensional array for the extraced info)
* @return boolean found_pattern
*/
function find_pattern($src, $regex, $multiple, $res_where_array)
{
$res_array = array();
if($multiple == true)
{
if(!preg_match_all($regex,$src,$info_block,PREG_SET_ORDER))
return false;
else
{
$counter_infoblock = 0;
foreach($info_block as $info)
{
$counter_reswhere = 0;
foreach ($res_where_array as $res_where_array_each)
{
$res_array[$counter_infoblock][$counter_reswhere] = $info[$res_where_array_each];
$counter_reswhere++;
}
$counter_infoblock++;
}
return $res_array;
}
}
else
{
if(!preg_match($regex,$src,$info))
return false;
else
{
$counter = 0;
foreach ($res_where_array as $res_where_array_each)
{
$res_array[$counter] = $info[$res_where_array_each];
$counter++;
}
return $res_array;
}
}
}
/** remove a given pattern from a given string
* @method truncate
* @param string src(source string), string $regex_s(starting needle), string $regex_e(ending needle), integer max(set it to 1 if you are sure the pattern only occurs once, otherwise, it indicates the maximum possible occurance in case of dead loop), boolean all(if remove all or just the pattern)
* @return string processed string
*/
function remove($src, $regex_s, $regex_e, $max = 100, $all = false)
{
$ret = "";
$ret = preg_replace("/" . $regex_s . "((\s|.)+?)" . $regex_e . "/i", ($all == false ? "\\1" : ""), $src, $max);
return $ret;
}
/** trim a given array of strings types from a given string
* @method trim_str
* @param string src(source string), string array regex_trim_array(specifies strings to be trimmed), integersafe_counter(maximum possible occurance of string to be trimmed)
* @return string processed string
*/
function trim_str($src, $regex_trim_array, $safe_counter =10)
{
$ret = "";
while($safe_counter>0)
{
$safe_counter--;
$break_flag = true;
foreach($regex_trim_array as $regex_trim_array_each)
{
$ret = preg_replace("/^((" . $regex_trim_array_each . ")*)((\s|.)+?)((" . $regex_trim_array_each . ")*)$/i","\\3", trim($src), 1);
if($ret != $src)
$break_flag = false;
$src = $ret;
}
if($break_flag)
break;
continue;
}
return $ret;
}
}
?>