Linux C程序操作Mysql 调用PHP采集淘宝商品

时间:2023-03-09 17:49:58
Linux C程序操作Mysql 调用PHP采集淘宝商品

还是继续这个项目。

在上一篇Linux下利用Shell使PHP并发采集淘宝产品中,采用shell将对PHP的调用推到后台执行,模拟多线程。

此方法有一致命缺点,只能人工预判每个程序执行时间。如果判断时间少于执行时间,则会生成大量进程,如果判断时间多于执行时间,则会浪费时间资源。

所以,在此我们采用C程序来控制并发数。

整体思路和用shell调用相似,只是把shell控制改成了C。

下面是C程序:

 #include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include "/usr/local/include/mysql/mysql.h"
#define MAX_COLUMN_LEN 32
#define THREAD_NUM 20//线程数
int threads = ;
pthread_t thread[THREAD_NUM];
pthread_mutex_t mut;//线程锁
int count=,vod_count=,number = ;
int *goods_id[];
void *thread1(int thread_id)
{
int sleepsec;
while (number < count){;
char shell_cmd[];
printf("number:%d\tthread_id=%d\tid=%s\n", number, thread_id, goods_id[number]);
sprintf(shell_cmd, "/usr/local/bin/php /var/www/9384shop/cron/goodsupdate.php %s", goods_id[number]);//生成shell命令
system(shell_cmd);//调用shell
pthread_mutex_lock(&mut);
number++;
pthread_mutex_unlock(&mut);
}
pthread_exit(NULL);
} void create_thread(void){
int i,temp;
for (i = ; i < THREAD_NUM; i++){
if (thread[i] == ){
if ((temp = pthread_create(&thread[i], NULL, thread1, i)) != ){
}
else{
threads++;
}
break;
}
}
sleep();
}
void thread_wait(void)
{
int i;
/*等待线程结束*/
for (i = ; i < THREAD_NUM; i++){
if (thread[i] != ) {
pthread_join(thread[i], NULL);
}
}
}
int main(int argc, char *argv[]){
MYSQL my_connection;
MYSQL_RES *result;
MYSQL_ROW sql_row;
MYSQL_FIELD *fd;
char column[MAX_COLUMN_LEN][MAX_COLUMN_LEN];
int res,flag;
mysql_init(&my_connection);
if (mysql_real_connect(&my_connection, "localhost"
, "root", "202.133", "shop", , NULL, )){
printf("connected to mysql.\n");
res = mysql_query(&my_connection, "select id from s_goods where is_off_sale=0 order by id desc limit 1000000");//查询
printf("select id from s_goods where is_off_sale=0 order by id desc limit 1000000\n");
if (!res){
int i = , j;
result = mysql_store_result(&my_connection);//保存查询到的数据到result
printf("the result number is %lu\n", (unsigned long)mysql_num_rows(result));
count = (unsigned long)mysql_num_rows(result);
while (sql_row = mysql_fetch_row(result))//获取具体的数据
{
goods_id[i] = (unsigned long)sql_row[];
i++;
}
}
mysql_close(&my_connection);//断开连接
while (threads < THREAD_NUM)
create_thread();
thread_wait();
}
else{
mysql_close(&my_connection);//断开连接
printf("ERROR:can not connect to mysql\n");
} }

PHP:

 <?php
define("OTHER",true);
$host='localhost';
$username='root';
$password='123456';
$db_name='taobao';
$s=microtime(1);
$id=$argv[1]; $con=mysql_connect($host,$username,$password);
mysql_select_db($db_name, $con);
$r=mysql_fetch_array(mysql_query('SELECT url,price FROM s_goods where id='.$id),MYSQL_ASSOC);
mysql_close($con);
$oldprice=$r['price'];
$rs=getPrice($r['url']);
$t=microtime(1)-$s;
$r=array();
$r[]=date('Y-m-d H:i:s');
$r[]=$id;
$r[]=ceil($t*1000)/1000;
if($rs=='soldout'){
$r[]="OutStock";
$con=mysql_connect($host,$username,$password);
mysql_select_db($db_name, $con);
mysql_query("UPDATE s_goods SET is_off_sale=1 WHERE id=".$id);
mysql_close($con);
}
elseif($rs===false) $r[]= 'FALSE';
else{
$r[]=$oldprice;
$r[]=isset($rs['price'])?$rs['price']:'';
$r[]=isset($rs['seller_nick'])?$rs['seller_nick']:'';
$r[]=isset($rs['taobao_shop_id'])?$rs['taobao_shop_id']:'';
$r[]=isset($rs['shop_name'])?$rs['shop_name']:'';
$r[]=isset($rs['sales'])?$rs['sales']:'';
$r[]=isset($rs['taobao_cid'])?$rs['taobao_cid']:'';
$r[]=isset($rs['merchandis_score'])?$rs['merchandis_score']:'';
$r[]=isset($rs['merchandis_total'])?$rs['merchandis_total']:'';
$a=array();
//$rs['is_off_sale']=0;
foreach ($rs as $k=>$v){
if(!empty($v)){
$a[]="$k='$v'";
}
}
$a[]="update_time='".date('Y-m-d H:i:s')."'";
$con=mysql_connect($host,$username,$password);
mysql_select_db($db_name, $con);
mysql_query("set names utf8");
mysql_query("UPDATE s_goods SET ".implode(',',$a)." WHERE id=".$id);
mysql_close($con);
}
$h=fopen('/home/staff/www/9384shop/cron/goodsUpdate.log','a+'); fputcsv($h,$r);
fclose($h); function getPrice($url){
$rs=array();
preg_match('/[&|\?]id=(\d+)/',$url,$id);
$id=$id[1];
$c=curls($url,true);
$content = $c['content'];
if(empty($content)) exit;
$content=mb_convert_encoding($content,"UTF-8","gbk");
$lastredirectaddr = $c['lastredirectaddr'];
if(preg_match('/noitem\.htm/',$content)||preg_match('/<strong>此宝贝已下架<\/strong>|您查看的商品找不到了|您查看的宝贝不存在,可能已下架或者被转移/',$content)){
return 'soldout';
}elseif(preg_match("/'reservePrice'\s*:\s*'([\d\.]+?)',/",$content,$price)){
$price = (float)$price[1];
}elseif(preg_match('/price:([\d\.]+?),/',$content,$price)){
$price = (float)$price[1];
}
if(preg_match('/"sellerNickName"\s*:\s*"(.*?)",/',$content,$nick)){
$rs['seller_nick'] = urldecode($nick[1]);
}elseif(preg_match('/sellerNick\s*:\s*"(.*?)",/',$content,$nick)){
$rs['seller_nick'] = $nick[1];
}
if(preg_match('/shopId:"(\d+?)",/',$content,$shopid)){
$rs['taobao_shop_id']=$shopid[1];
}elseif(preg_match('/&shopId=(\d+)&/',$content,$shopid)){
$rs['taobao_shop_id']=$shopid[1];
}
if(preg_match("/'categoryId'\s*:\s*'(\d+?)',/",$content,$cid)){
$rs['taobao_cid'] = (float)$cid[1];
}elseif(preg_match('/"categoryId"\s*:\s*"(\d+?)",/',$content,$cid)){
$rs['taobao_cid'] = (float)$cid[1];
}elseif(preg_match("/\scid:'(\d+?)',/",$content,$cid)){
$rs['taobao_cid'] = (float)$cid[1];
}
if(OTHER){
if(preg_match('/tmall\.com/',$lastredirectaddr)){
if(preg_match('/slogo-shopname.*?>(.*?)<\/a>/',$content,$shopname)){
$rs['shop_name']=json_decode('"'.$shopname[1].'"');
}
if(empty($rs['shop_name'])&&!empty($shopname[1])) $rs['shop_name']=$shopname[1];
if(empty($rs['shop_name'])&&!empty($rs['seller_nick'])) $rs['shop_name']=$rs['seller_nick'];
$url2='http://mdskip.taobao.com/core/initItemDetail.htm?itemId='.$id;
$tmall_info = curls($url2);
preg_match('/"sellCount"\s*:\s*(\d+)/',$tmall_info,$temp);
if ($temp[1]!='') $rs['sales']=$temp[1];
$merchandis=curls("http://dsr.rate.tmall.com/list_dsr_info.htm?callback=a&itemId=".$id);
if(preg_match('/gradeAvg"\s*:\s*([0-9\.]+)/',$merchandis,$m_t))
$rs['merchandis_score']=$m_t[1];
if(preg_match('/rateTotal"\s*:\s*([0-9]+)/',$merchandis,$m_t2))
$rs['merchandis_total']=$m_t2[1];
}else{
if(preg_match('/shopName\s*:\s*"(.*?)",/',$content,$shopname)){ $rs['shop_name']=json_decode('"'.$shopname[1].'"');
}
if(empty($rs['shop_name'])&&!empty($rs['seller_nick'])) $rs['shop_name']=$rs['seller_nick'];
if(preg_match('/sellerId\s*:\s*"(.*?)"/',$content,$sellerid)||preg_match('/userId\':\'(\d+)\'/',$content,$sellerid)){
$sellerid = $sellerid[1];
}
if(preg_match('/sbn=([0-9a-z]+)/',$content,$sbn))
$sbn=$sbn[1];
$url2='http://detailskip.taobao.com/json/ifq.htm?id='.$id.'&sid='.$sellerid.'&sbn='.$sbn.'&q=1&callback=a';
$count_rs = curls($url2);
preg_match('/quanity\s*:\s*(\d+)/',$count_rs,$temp);
if ($temp[1]!='') $rs['sales']=$temp[1];
$merchandis=curls("http://rate.taobao.com/detail_rate.htm?userNumId=$sellerid&auctionNumId=$id&currentPage=1&rateType=1");
if(preg_match('/merchandisScore"\s*:\s*"([0-9\.]+)/',$merchandis,$m_t)) $rs['merchandis_score']=$m_t[1];
else $rs['merchandis_score']=6;
if(preg_match('/merchandisTotal"\s*:\s*([0-9]+)/',$merchandis,$m_t)) $rs['merchandis_total']=$m_t[1];
else $rs['merchandis_total']=0;
}
}
if(!$price){
if(!isset($tmall_info)){
$url2="http://mdskip.taobao.com/core/initItemDetail.htm?itemId=".$id;
$tmall_info=curls($url2);
}
$price_content=json_decode(iconv('gbk','utf-8',preg_replace('/(\d{10,}):/','"${1}":',$tmall_info)),true);
$priceinfo=$price_content['defaultModel']['itemPriceResultDO']['priceInfo'];
$price=array();
if(is_array($priceinfo)){
foreach ($priceinfo as $v){
if($v['price']>0)
$price[]=$v['price'];
if(is_array($v['promotionList'])){
foreach ($v['promotionList'] as $v2){
$p=$v2['extraPromPrice']?$v2['extraPromPrice']:$v2['price'];
if($p>0) $price[]=$p;
}
}
if(is_array($v['suggestivePromotionList'])){
foreach ($v['suggestivePromotionList'] as $v2){
$p=$v2['extraPromPrice']?$v2['extraPromPrice']:$v2['price'];
if($p>0) $price[]=$p;
}
}
}
}
$price=count($price)>0?min($price):false;
}
$rs['price']=$price;
if(count($rs)) return $rs;
else return false;
}
function curls($url,$lastredirectaddr=false,$head=false,$times=1){
$ch = curl_init();
curl_setopt($ch, CURLOPT_USERAGENT,'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:26.0) Gecko/20100101 Firefox/26.0');
curl_setopt($ch, CURLOPT_REFERER,'http://www.tmall.com/');
curl_setopt($ch, CURLOPT_FOLLOWLOCATION,1);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);//设置输出方式, 0为自动输出返回的内容, 1为返回输出的内容,但不自动输出.
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30); //timeout on connect
curl_setopt($ch, CURLOPT_TIMEOUT, 30); //timeout on response
curl_setopt($ch, CURLOPT_HEADER, $head);//是否输出头信息,0为不输出,非零则输出
curl_setopt($ch, CURLOPT_MAXREDIRS, 50 );
curl_setopt($ch, CURLOPT_URL, $url);
$count_rs = curl_exec($ch);
if($count_rs === false){
echo 'Curl error: ' . curl_error($ch)."\n";
exit;
}
if($lastredirectaddr) $count_rs=array('content'=>$count_rs,'lastredirectaddr'=>curl_getinfo($ch,CURLINFO_EFFECTIVE_URL));
curl_close($ch);
if($count_rs!=''||$count_rs['content']!='') return $count_rs;
elseif($times<3) return curls($url,$lastredirectaddr,$head,$times+1);
else return false;
}

程序执行结果:

 "2014-04-28 12:55:17",,0.967,200.00,200.00,力挺服饰专营店,,力挺服饰专营店,,,0.0,
"2014-04-28 12:55:17",,1.018,250.00,250.00,力挺服饰专营店,,力挺服饰专营店,,,5.0,
"2014-04-28 12:55:17",,1.001,189.00,189.00,兴铭服饰专营店,,兴铭服饰专营店,,,0.0,
"2014-04-28 12:55:17",,0.979,500.00,500.00,力挺服饰专营店,,力挺服饰专营店,,,5.0,
"2014-04-28 12:55:17",,0.982,150.00,150.00,力挺服饰专营店,,力挺服饰专营店,,,5.0,
"2014-04-28 12:55:17",,0.874,138.00,,美品坊,,精致女装美品坊,,,,
"2014-04-28 12:55:17",,1.008,229.00,229.00,兴铭服饰专营店,,兴铭服饰专营店,,,0.0,
"2014-04-28 12:55:17",,0.962,259.00,259.00,爱购叁陆陆服饰专营店,,爱购叁陆陆服饰专营店,,,0.0,
"2014-04-28 12:55:17",,1.017,273.42,273.42,力挺服饰专营店,,力挺服饰专营店,,,0.0,
"2014-04-28 12:55:17",,0.961,646.80,646.80,羽戈旗舰店,,羽戈旗舰店,,,4.7,
"2014-04-28 12:55:17",,1.011,239.00,239.00,兴铭服饰专营店,,兴铭服饰专营店,,,0.0,
"2014-04-28 12:55:17",,1.009,235.12,235.12,恋尚妮家纺旗舰店,,恋尚妮家纺旗舰店,,,4.5,
"2014-04-28 12:55:17",,0.968,320.68,320.68,恋尚妮家纺旗舰店,,恋尚妮家纺旗舰店,,,4.8,
"2014-04-28 12:55:17",,0.946,19.50,19.50,淘公馆数码专营店,,淘公馆数码专营店,,,4.6,
"2014-04-28 12:55:17",,0.985,482.92,482.92,恋尚妮家纺旗舰店,,恋尚妮家纺旗舰店,,,4.8,
"2014-04-28 12:55:17",,0.968,125.00,128.00,忆红妆旗舰店,,忆红妆旗舰店,,,4.9,
"2014-04-28 12:55:17",,0.988,99.00,99.00,忆红妆旗舰店,,忆红妆旗舰店,,,4.8,
"2014-04-28 12:55:17",,0.976,135.00,148.00,忆红妆旗舰店,,忆红妆旗舰店,,,4.7,
"2014-04-28 12:55:18",,0.964,242.00,245.00,忆红妆旗舰店,,忆红妆旗舰店,,,4.7,
"2014-04-28 12:55:18",,0.953,412.70,427.50,忆红妆旗舰店,,忆红妆旗舰店,,,4.7,
"2014-04-28 12:55:18",,0.971,363.00,365.00,忆红妆旗舰店,,忆红妆旗舰店,,,4.8,
"2014-04-28 12:55:18",,0.973,179.10,175.00,忆红妆旗舰店,,忆红妆旗舰店,,,4.8,
"2014-04-28 12:55:18",,0.981,334.65,331.00,妹魅旗舰店,,妹魅旗舰店,,,4.7,
"2014-04-28 12:55:18",,0.943,315.00,315.00,gotrip箱包旗舰店,,gotrip箱包旗舰店,,,4.8,
"2014-04-28 12:55:18",,0.989,192.00,192.00,哈妃猫旗舰店,,哈妃猫旗舰店,,,4.8,
"2014-04-28 12:55:18",,0.965,426.00,426.00,chicsouls旗舰店,,chicsouls旗舰店,,,4.8,
"2014-04-28 12:55:18",,0.953,99.00,99.00,莉娅阁旗舰店,,莉娅阁旗舰店,,,4.8,
"2014-04-28 12:55:18",,2.126,158.00,,天天都特价等你,,天天都特价,,,,
"2014-04-28 12:55:18",,0.973,2999.00,2999.00,舒适堡鞋类旗舰店,,舒适堡鞋类旗舰店,,,5.0,
"2014-04-28 12:55:18",,0.98,589.00,598.00,舒适堡鞋类旗舰店,,舒适堡鞋类旗舰店,,,5.0,
"2014-04-28 12:55:18",,0.972,253.00,253.00,非你不嫁服饰旗舰店,,非你不嫁服饰旗舰店,,,5.0,
"2014-04-28 12:55:18",,0.854,198.00,,刀1984,,LFMY,,,,
"2014-04-28 12:55:18",,0.965,235.00,235.00,千禧新娘旗舰店,,千禧新娘旗舰店,,,4.8,
"2014-04-28 12:55:18",,0.98,10.00,10.00,朵品旗舰店,,朵品旗舰店,,,4.9,
"2014-04-28 12:55:18",,0.973,619.74,187.80,珂尼娅旗舰店,,珂尼娅旗舰店,,,5.0,
"2014-04-28 12:55:18",,0.977,138.00,138.00,eyesonu服饰旗舰店,,eyesonu服饰旗舰店,,,4.7,
"2014-04-28 12:55:19",,0.97,178.00,178.00,shezgood旗舰店,,shezgood旗舰店,,,5.0,
"2014-04-28 12:55:19",,0.992,119.00,119.00,伊莲旗舰店,,伊莲旗舰店,,,4.7,
"2014-04-28 12:55:19",,0.967,219.80,219.80,爱伴箱包旗舰店,,爱伴箱包旗舰店,,,4.7,
"2014-04-28 12:55:19",,0.948,86.00,84.71,姿态服饰专营店,,姿态服饰专营店,,,3.6,
"2014-04-28 12:55:19",,1.082,99.00,98.90,奈奈爱霓女装旗舰店,,奈奈爱霓女装旗舰店,,,4.8,
"2014-04-28 12:55:19",,0.995,50.00,50.00,牧缇旗舰店,,牧缇旗舰店,,,4.8,
"2014-04-28 12:55:19",,0.998,98.01,98.01,lishberry旗舰店,,lishberry旗舰店,,,4.8,
"2014-04-28 12:55:19",,0.991,498.00,498.00,uncontrollable旗舰店,,uncontrollable旗舰店,,,4.5,
"2014-04-28 12:55:19",,0.981,99.00,99.00,森露旗舰店,,森露旗舰店,,,4.7,
"2014-04-28 12:55:19",,0.968,49.00,49.00,桃苡服饰旗舰店,,桃苡服饰旗舰店,,,5.0,
"2014-04-28 12:55:19",,0.954,360.64,360.64,深艺服饰旗舰店,,深艺服饰旗舰店,,,0.0,
"2014-04-28 12:55:19",,0.955,168.00,168.00,艾芭莉旗舰店,,艾芭莉旗舰店,,,4.8,
"2014-04-28 12:55:19",,0.962,78.00,78.00,歌莉韵旗舰店,,歌莉韵旗舰店,,,4.8,
"2014-04-28 12:55:19",,0.943,64.00,64.00,ieemk旗舰店,,ieemk旗舰店,,,4.7,

从日志中我们可以看出,1秒钟更新大概是15-20个产品。

采用这种方式既可以控制线程数,又能并发,或许是一个很好的解决方案。

但此方法也有自身的缺点:

  1.因为主要功能是通过PHP来实现的,所以每更新一个产品,操作系统必新创建一个进程,这大大增加了操作系统的开销,如果就在C中对PHP的主要功能进行实现,会使程序性能大大提高。

  2.功能耦合性太强,如果要改一个小细节只有重写源码然后编译(比如并发数,查询SQL等等),应采取参数方式来弥补这个缺点。

  3.因为C只给PHP传递了1个ID参数,PHP必须通过查询数据库来获得其它信息,这样就会增加数据库的压力,降低程序的效率。

因为我是初学C,现学现卖,水平有限,所以留待以后改进。