<?php

/*
	# EXAMPLE:

	$c = new WPI_Curly;
	
	$c->add( array( 'url' => 'http://www.google.com', 'opt' => array('CURLOPT_USERAGENT' => 'Awesome Bot 1.0') ) );
	$c->add( 'http://news.yahoo.com/' );
	
	$html = $c->fetch();
	print_r($html);
*/



class WPI_Curly {


	// request time tracking
	public $time = array( 'start' => 0, 'end' => 0, 'taken' => 0 );


	// user adds urls and curlopts here. Curly adds a 'html' element to each array.
	public $data = array();


	// can be 'always', 'never', or 'fallback'
	// For anything other than 'never' proxies must be entered into $this->proxies
	// 'fallback' trys to directly grab the given url. Only if it fails (timeout, empty content, error) will proxies be used.
	public $proxy_usage = 'never';


	// put proxies here.
	public $proxies = array();


	// nice mix of UAs to look more hooman.
	// But beware - websites sometimes return different content for different UAs, so multiple robust regex's needed.
	// currently: 5 * 3 * 4 = 60 UA variations
	private $user_agents = array(

		'strings' => array(
			'Mozilla/5.0 ({PLATFORM}; {CPU}; rv:8.0) Gecko/20100101 Firefox/8.0',
			'Mozilla/5.0 ({PLATFORM}; {CPU}) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.120 Safari/535.2',
			'Mozilla/5.0 (compatible; MSIE 9.0; {PLATFORM}; {CPU}; Trident/5.0)',
			'Opera/9.80 ({PLATFORM}; U; en) Presto/2.9.168 Version/11.51',
			'Mozilla/5.0 ({PLATFORM}; {CPU}) AppleWebKit/534.51.22 (KHTML, like Gecko) Version/5.1.1 Safari/534.51.22'
		),

		'cpu' => array(
			'Win64; IA64',
			'Win64; x64',
			'WOW64'
		),

		'platform' => array(
			'Windows NT 6.1',	// Windows 7
			'Windows NT 6.0',	// Windows Vista
			'Windows NT 5.2',	// Windows Server 2003; Windows XP x64 Edition
			'Windows NT 5.1',	// Windows X
		)

	);


	private $browser_header = array(
		'Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
		'Cache-Control: max-age=0',
		'Connection: keep-alive',
		'Keep-Alive: 300',
		'Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7',
		'Accept-Language: en-us,en;q=0.5',
		'Pragma: '
	);


	public function __construct()
	{

		// clock in
		$this->time['start'] = microtime(true);

	}


	public function fetch()
	{

		$this->init();
		$this->setopt();

		$this->multi_init();
		$this->multi_add_handles();
		$this->multi_exec();
		$this->multi_getcontent();
		$this->multi_close();

		$this->time['end'] = microtime(true);
		$this->time['taken'] = $this->time['end'] - $this->time['start'];

		return $this->data;

	}


	public function add($array)
	{

		// error checking
		if( !is_array($array) && strstr($array, 'http') )
			$array = array( 'url' => $array, 'opt' => array() );

		if( !is_array($array) || !isset($array['url']) )
			return false;

		if( !isset($array['opt']) )
			$array['opt'] = array();

		$this->data[] = $array;

	}


	protected function init()
	{

		global $ch;

		for( $i=0; $i<count($this->data); ++$i )
			$ch[$i] = curl_init();

	}


	protected function setopt()
	{

		global $ch;

		for( $i=0; $i<count($this->data); ++$i ) {

			// set standard options if they aren't already chosen
			curl_setopt($ch[$i], CURLOPT_URL, $this->data[$i]['url']);

			if( !isset($this->data[$i]['opt']['CURLOPT_HTTPHEADER']) )
				curl_setopt($ch[$i], CURLOPT_HTTPHEADER, $this->browser_header );

			if( !isset($this->data[$i]['opt']['CURLOPT_ENCODING']) )
				curl_setopt($ch[$i], CURLOPT_ENCODING, 'gzip,deflate' );

			if( !isset($this->data[$i]['opt']['CURLOPT_RETURNTRANSFER']) )
				curl_setopt($ch[$i], CURLOPT_RETURNTRANSFER, 1 );

			if( !isset($this->data[$i]['opt']['CURLOPT_AUTOREFERER']) )
				curl_setopt($ch[$i], CURLOPT_AUTOREFERER, 1 );

			if( !isset($this->data[$i]['opt']['CURLOPT_TIMEOUT']) )
				curl_setopt($ch[$i], CURLOPT_TIMEOUT, 15 );

			if( !isset($this->data[$i]['opt']['CURLOPT_HEADER']) )
				curl_setopt($ch[$i], CURLOPT_HEADER, 0 );

			if( !isset($this->data[$i]['opt']['CURLOPT_FOLLOWLOCATION']) )
				@curl_setopt($ch[$i], CURLOPT_FOLLOWLOCATION, 1 ); // silenced to remove the warning when safe_mode or open_basedir are on

			if( !isset($this->data[$i]['opt']['CURLOPT_USERAGENT']) )
				curl_setopt($ch[$i], CURLOPT_USERAGENT, $this->rand_ua() );

			if( strstr('https', $this->data[$i]['url']) ) {

				if(!isset($this->data[$i]['opt']['CURLOPT_SSL_VERIFYPEER']) )
					curl_setopt($ch[$i], CURLOPT_SSL_VERIFYPEER, false );

				if(!isset($this->data[$i]['opt']['CURLOPT_SSL_VERIFYHOST']) )
					curl_setopt($ch[$i], CURLOPT_SSL_VERIFYHOST, false );

			}

			// set any user specified options
			foreach( $this->data[$i]['opt'] as $opt => $val ) {

				if( is_array($val) )
					@curl_setopt( $ch[$i], constant($opt), (array)$val ); // silence so that multi arrays go through ok
				else
					curl_setopt( $ch[$i], constant($opt), $val );

			}

		}

	}


	protected function multi_init()
	{

		global $mh;
		$mh = curl_multi_init();

	}


	protected function multi_add_handles()
	{

		global $ch, $mh;
		for( $i=0; $i<count($this->data); ++$i )
			curl_multi_add_handle( $mh, $ch[$i] );

	}


	protected function multi_exec()
	{

		global $ch, $mh;
		$running = null;
		do {
			curl_multi_exec( $mh, $running );
			usleep(50); // be kind to the cpu
		} while( $running > 0 );

	}


	protected function multi_getcontent()
	{

		global $ch, $mh;

		for( $i=0; $i<count($this->data); ++$i ) {
			$content = curl_multi_getcontent( $ch[$i] );
			$this->data[$i]['html'] = $content;
			$this->data[$i]['http_code'] = curl_getinfo( $ch[$i], CURLINFO_HTTP_CODE );
		}

	}


	protected function multi_close()
	{

		global $ch, $mh;
		$num_data = count($this->data);

		for( $i=0; $i<$num_data; ++$i )
			curl_multi_remove_handle( $mh, $ch[$i] );

		curl_multi_close( $mh );

	}


	protected function rand_ua()
	{
		
		// choose a UA string
		$rand_string_key = rand( 0, count($this->user_agents['strings']) - 1 );
		$ua = $this->user_agents['strings'][$rand_string_key];
		
		// get a rand CPU value
		$rand_cpu_key = rand( 0, count($this->user_agents['cpu']) - 1 );
		$ua = str_replace( '{CPU}', $this->user_agents['cpu'][$rand_cpu_key], $ua );
		
		// get a rand PLATFORM value
		$rand_platform_key = rand( 0, count($this->user_agents['platform']) - 1 );
		$ua = str_replace( '{PLATFORM}', $this->user_agents['platform'][$rand_platform_key], $ua );
		
		return $ua;
		
	}


}

