PHP 爬图脚本 Posted on 2014-11-10 | In PHP | detect.php 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131class IImage{ private $url; private $aContext = array( 'http' => array( // 'proxy' => 'tcp://192.168.1.105:2516', 'proxy' => 'tcp://127.0.0.1:80', 'request_fulluri' => true, ), ); private $pattern_title = "/<title>\[.*?\](.*?)\s\s.*?<\/title>/"; private $pattern_images = "/<input\stype=\'image\'\ssrc=\'(.*?)\'.*?>/"; public function __construct($url) { $this->url = $url; } private function getContent() { $contentArr = array(); $cxContext = NULL; $content = NULL; if (is_array($this->url)) { foreach ($this->url as $key => $url) { $cxContext = stream_context_create($this->aContext); $content = file_get_contents($url, False, $cxContext); $content = iconv('gbk', 'utf-8', $content); array_push($contentArr, $content); } return $contentArr; } else { $cxContext = stream_context_create($this->aContext); $content = file_get_contents($this->url, False, $cxContext); $content = iconv('gbk', 'utf-8', $content); return $content; } } public function getTitle() { $titleArr = array(); $title = NULL; if (is_array($this->url)) { foreach ($this->getContent() as $key => $content) { preg_match_all($this->pattern_title, $content, $title); array_push($titleArr, $title[1][0]); } return $titleArr; } else { preg_match_all($this->pattern_title, $this->getContent(), $title); return $title[1][0]; } } private function getImages() { $imagesArr = array(); $images = NULL; if (is_array($this->url)) { foreach ($this->getContent() as $key => $content) { preg_match_all($this->pattern_images, $content, $images); array_push($imagesArr, $images[1]); } return $imagesArr; } else { preg_match_all($this->pattern_images, $this->getContent(), $this->images); return $this->images[1]; } } public function saveImages($dir) { echo "Starting...\n\r"; if (!is_dir($dir)) { mkdir($dir); echo "Create directory ".$dir."\n\r"; } // Create date directory $today = getdate(); $toDir = $today['mon'].'-'.$today['mday'].'-'.$today['year']; if (!is_dir($dir.'/'.$toDir)) { mkdir($dir.'/'.$toDir); echo "Create directory ".$dir.'/'.$toDir."\n\r"; } if (is_array($this->url)) { foreach ($this->getTitle() as $key => $title) { if (!$title) { $image_dir = $dir."/".$toDir.'/noTitle'; } else { $image_dir = $dir."/".$toDir.'/'.$title; } if (!is_dir($image_dir)) { mkdir($image_dir); echo "Create directory ".$image_dir."\n\r"; } echo "Downloading ".$title."\n\r"; foreach (array_unique($this->getImages()[$key]) as $key => $image) { $image_name = $image_dir."/".$key.".".pathinfo($image, PATHINFO_EXTENSION); if (!file_exists($image_name) && $image_stream=file_get_contents($image)) { file_put_contents($image_name, $image_stream); echo $image_name." downloaded\n\r"; } } } echo "Download complete\n\r"; } else { if (!$this->getTitle()) { $image_dir = $dir."/".$toDir.'/noTitle'; } else { $image_dir = $dir."/".$toDir.'/'.$title; } if (!is_dir($image_dir)) { mkdir($image_dir); echo "Create directory ".$image_dir."\n\r"; } echo "Downloading...\n\r"; foreach (array_unique($this->getImages()) as $key => $image) { $image_name = $image_dir."/".$key.".".pathinfo($image, PATHINFO_EXTENSION); if (!file_exists($image_name) && $image_stream=file_get_contents($image)) { file_put_contents($image_name, $image_stream); echo $image_name." dowloaded\n\r"; } } echo "Download complete\n\r"; } }} list.php 1234567891011121314151617181920212223242526272829303132333435363738class IList{ private $url; private $aContext = array( 'http' => array( 'proxy' => 'tcp://192.168.1.105:2516', 'request_fulluri' => true, ), ); private $list_link_pattern = "/<tr.*?class=\"tr3.*?><td><a.*?href=\"(.*?)\"/"; private $list_link_pattern1 = "/<title>.*?<\/title>/"; public function __construct($url) { $this->url = $url; } private function getContent() { $cxContext = stream_context_create($this->aContext); $content = file_get_contents($this->url, False, $cxContext); return $content; } private function resolveXMLtoList() { $xml = simplexml_load_string(trim($this->getContent())); foreach ($xml->channel->item as $key => $item) { if ($key === 0) { continue; } array_push($list, (string)$item->link); } return $list; } public function getList() { return $list; }} awesome.php 1234567891011121314151617181920212223242526include_once('list.php');include_once('detect.php');$rss = "http://t66y.com/rss.php?fid=16";$url = "http://wo.yao.cl/htm_data/16/1411/1274506.html";$L = new IList($rss);// $list = $L->getList();$I = new IImage($url);$I->saveImages('../img');function moveNaked($dir='img') { echo "Moving...\n\r"; $files = new DirectoryIterator($dir); $imagePattern = "/(jpg|png|gif)/"; foreach ($files as $fileinfo) { if ($fileinfo->isFile() && preg_match($imagePattern, $fileinfo->getExtension())) { $oldname = $fileinfo->getPath()."/".$fileinfo->getFilename(); $newname = realpath("../img/noName\")."/".$fileinfo->getFilename(); rename($oldname, $newname); echo $fileinfo->getFilename()." has moved to ".$dir."/noName/\n\r"; } } echo "Move completed.\n\r";}