Justin's Words

PHP 爬图脚本

detect.php

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
class IImage
{
private $url;
private $aContext = array(
'http' => array(
// 'proxy' => 'tcp://192.168.1.105:2516',
'proxy' => 'tcp://127.0.0.1:80',
'request_fulluri' => true,
),
);
private $pattern_title = "/<title>\[.*?\](.*?)\s\s.*?<\/title>/";
private $pattern_images = "/<input\stype=\'image\'\ssrc=\'(.*?)\'.*?>/";

public function __construct($url) {
$this->url = $url;
}

private function getContent() {
$contentArr = array();
$cxContext = NULL;
$content = NULL;
if (is_array($this->url)) {
foreach ($this->url as $key => $url) {
$cxContext = stream_context_create($this->aContext);
$content = file_get_contents($url, False, $cxContext);
$content = iconv('gbk', 'utf-8', $content);
array_push($contentArr, $content);
}
return $contentArr;
} else {
$cxContext = stream_context_create($this->aContext);
$content = file_get_contents($this->url, False, $cxContext);
$content = iconv('gbk', 'utf-8', $content);
return $content;
}
}

public function getTitle() {
$titleArr = array();
$title = NULL;
if (is_array($this->url)) {
foreach ($this->getContent() as $key => $content) {
preg_match_all($this->pattern_title, $content, $title);
array_push($titleArr, $title[1][0]);
}
return $titleArr;
} else {
preg_match_all($this->pattern_title, $this->getContent(), $title);
return $title[1][0];
}
}

private function getImages() {
$imagesArr = array();
$images = NULL;
if (is_array($this->url)) {
foreach ($this->getContent() as $key => $content) {
preg_match_all($this->pattern_images, $content, $images);
array_push($imagesArr, $images[1]);
}
return $imagesArr;
} else {
preg_match_all($this->pattern_images, $this->getContent(), $this->images);
return $this->images[1];
}
}

public function saveImages($dir) {
echo "Starting...\n\r";

if (!is_dir($dir)) {
mkdir($dir);
echo "Create directory ".$dir."\n\r";
}

// Create date directory
$today = getdate();
$toDir = $today['mon'].'-'.$today['mday'].'-'.$today['year'];

if (!is_dir($dir.'/'.$toDir)) {
mkdir($dir.'/'.$toDir);
echo "Create directory ".$dir.'/'.$toDir."\n\r";
}

if (is_array($this->url)) {
foreach ($this->getTitle() as $key => $title) {

if (!$title) {
$image_dir = $dir."/".$toDir.'/noTitle';
} else {
$image_dir = $dir."/".$toDir.'/'.$title;
}
if (!is_dir($image_dir)) {
mkdir($image_dir);
echo "Create directory ".$image_dir."\n\r";
}
echo "Downloading ".$title."\n\r";
foreach (array_unique($this->getImages()[$key]) as $key => $image) {
$image_name = $image_dir."/".$key.".".pathinfo($image, PATHINFO_EXTENSION);
if (!file_exists($image_name) && $image_stream=file_get_contents($image)) {
file_put_contents($image_name, $image_stream);
echo $image_name." downloaded\n\r";
}
}
}
echo "Download complete\n\r";
} else {

if (!$this->getTitle()) {
$image_dir = $dir."/".$toDir.'/noTitle';
} else {
$image_dir = $dir."/".$toDir.'/'.$title;
}

if (!is_dir($image_dir)) {
mkdir($image_dir);
echo "Create directory ".$image_dir."\n\r";
}

echo "Downloading...\n\r";
foreach (array_unique($this->getImages()) as $key => $image) {
$image_name = $image_dir."/".$key.".".pathinfo($image, PATHINFO_EXTENSION);
if (!file_exists($image_name) && $image_stream=file_get_contents($image)) {
file_put_contents($image_name, $image_stream);
echo $image_name." dowloaded\n\r";
}
}
echo "Download complete\n\r";
}
}
}

list.php

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
class IList
{
private $url;
private $aContext = array(
'http' => array(
'proxy' => 'tcp://192.168.1.105:2516',
'request_fulluri' => true,
),
);
private $list_link_pattern = "/<tr.*?class=\"tr3.*?><td><a.*?href=\"(.*?)\"/";
private $list_link_pattern1 = "/<title>.*?<\/title>/";

public function __construct($url) {
$this->url = $url;
}

private function getContent() {
$cxContext = stream_context_create($this->aContext);
$content = file_get_contents($this->url, False, $cxContext);
return $content;
}

private function resolveXMLtoList() {
$xml = simplexml_load_string(trim($this->getContent()));
foreach ($xml->channel->item as $key => $item) {
if ($key === 0) {
continue;
}
array_push($list, (string)$item->link);
}
return $list;
}

public function getList() {

return $list;
}
}

awesome.php

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
include_once('list.php');
include_once('detect.php');

$rss = "http://t66y.com/rss.php?fid=16";
$url = "http://wo.yao.cl/htm_data/16/1411/1274506.html";

$L = new IList($rss);
// $list = $L->getList();

$I = new IImage($url);
$I->saveImages('../img');

function moveNaked($dir='img') {
echo "Moving...\n\r";
$files = new DirectoryIterator($dir);
$imagePattern = "/(jpg|png|gif)/";
foreach ($files as $fileinfo) {
if ($fileinfo->isFile() && preg_match($imagePattern, $fileinfo->getExtension())) {
$oldname = $fileinfo->getPath()."/".$fileinfo->getFilename();
$newname = realpath("../img/noName\")."/".$fileinfo->getFilename();
rename($oldname, $newname);
echo $fileinfo->getFilename()." has moved to ".$dir."/noName/\n\r";
}
}
echo "Move completed.\n\r";
}