新浪微博数据挖掘食谱之九: 用户篇 (获取转发微博的用户名)

时间:2022-04-02 09:33:30
#!/usr/bin/python 
# -*- coding: utf-8 -*-

'''
Created on 2015-1-5
@author: beyondzhou
@name: find_repost_users.py
'''

# Find users who have repost weibos
def find_repost_users():
    
    # import 
    from search import weibo_search
    from entities import weibo_entities
    from login import weibo_login
    from statuses import fetch_repost_timeline, fetch_weibo_status
    import json
    
    # Access to sina api
    weibo_api = weibo_login()
    
    # Do the search
    subject = weibo_search(topic='iphone')
    
    # Decode entities
    (mids, names, texts, dates, reposts, comments, likes) = weibo_entities(subject)
    
    # Find weibo id whose repost number is above then 1
    for index in range(len(reposts)):
        if reposts[index] > 0:
            weibo_id_reposted = mids[index]
            print 'reposts number: %s, weibo_id_reposted: %s' % (reposts[index], weibo_id_reposted)
            break
        
    # Find repost timeline     
    repost_timeline = fetch_repost_timeline(weibo_api, count = 200, page = 1, weibo_id = weibo_id_reposted)
    
    # Find repost users from repost timeline
    repost_users = [status['user']['screen_name'] for status in repost_timeline]
    
    # Output primary weibo 
    primary_weibo = fetch_weibo_status(weibo_api, weibo_id = weibo_id_reposted)
    print json.dumps(primary_weibo, indent=1)
    print 'Output primary weibo done!\n'
    
    # Output repost users
    for repost_user in repost_users:
        print repost_user,    
    print 'Output repost users done!\n'
        
if __name__ == '__main__':
    find_repost_users()

# Get repost repost weibo timeline
def fetch_repost_timeline(weibo_api, count = 200, page = 1, weibo_id = 1):
    
    repost_timeline = weibo_api.statuses.repost_timeline.get(count=count, page=page, id = weibo_id)
    statuses = repost_timeline['reposts']
    return statuses

# Get weibo status
def fetch_weibo_status(weibo_api, weibo_id = 1):
    
    weibo_status = weibo_api.statuses.show.get(id = weibo_id)
    return weibo_status

Result:

callback_url: https://api.weibo.com/oauth2/authorize?redirect_uri=http%3A//apps.weibo.com/guaguastd&response_type=code&client_id=2925245021
return_redirect_uri: http://weibo.com/login.php?url=http%3A%2F%2Fapps.weibo.com%2Fguaguastd%3Fcode%3D9a0aee0faa54175fb424c6241c2fcf57
code: ['9a0aee0faa54175fb424c6241c2fcf57']
now_handle: e03dd8e0-9458-11e4-bd7a-51164c1d28c3
http://passport.weibo.com/
all_handles: [u'e03dd8e0-9458-11e4-bd7a-51164c1d28c3', u'e4c29db0-9458-11e4-bd7a-51164c1d28c3']
i= 0
search done!
mids entities done!
names entities done!
texts entities done!
dates entities done!
reposts entities done!
comments entities done!
likes entities done!
reposts number: 6055, weibo_id_reposted: 3795252660296828
{
 "reposts_count": 6055, 
 "truncated": false, 
 "text": "iPhone6\u7528\u6237\u90fd\u4e0d\u5728\u610f1G\u5185\u5b58\u4e0d\u591f\uff0c\u7ea2\u7c732\u76841G\u5185\u5b58\u591f\u7528\u4e86\uff0c\u4fdd\u6301\u6d41\u7545\u7684\u79d8\u8bc0\u5c31\u662f\u5c11\u88c5app\uff0c\u5c24\u5176\u662f\u4e0d\u8981\u88c5xx\u536b\u58eb\uff0cxx\u6d4f\u89c8\u5668\u8fd9\u6837\u5e38\u9a7b\u5185\u5b58\u6216\u8005\u7279\u522b\u8017\u5185\u5b58\u7684\u3002", 
 "visible": {
  "type": 0, 
  "list_id": 0
 }, 
 "in_reply_to_status_id": "", 
 "id": 3795252660296828, 
 "mid": "3795252660296828", 
 "source": "<a href=\"http://app.weibo.com/t/feed/c66T5g\" rel=\"nofollow\">Android\u5ba2\u6237\u7aef</a>", 
 "attitudes_count": 542, 
 "in_reply_to_screen_name": "", 
 "pic_urls": [], 
 "annotations": [
  {
   "client_mblogid": "fb0b9486-c9cb-4e82-a646-ad39f2b80b85", 
   "shooting": 1
  }
 ], 
 "in_reply_to_user_id": "", 
 "darwin_tags": [], 
 "favorited": false, 
 "idstr": "3795252660296828", 
 "source_type": 1, 
 "user": {
  "bi_followers_count": 308, 
  "domain": "qq993663", 
  "avatar_large": "http://tp2.sinaimg.cn/1782068057/180/5681010393/1", 
  "verified_source": "", 
  "ptype": 0, 
  "statuses_count": 6679, 
  "id": 1782068057, 
  "verified_reason_url": "", 
  "city": "1000", 
  "verified": true, 
  "friends_count": 372, 
  "verified_reason_modified": "", 
  "credit_score": 80, 
  "block_app": 0, 
  "follow_me": false, 
  "verified_reason": "\u5c0f\u7c73\u521d\u521b\u5458\u5de5\uff0cMIUI\u5f00\u53d1\u5de5\u7a0b\u5e08\uff0c\u5c0f\u7c73\u751f\u6001\u94fe\u4ea7\u54c1\u89c4\u5212\u603b\u76d1", 
  "followers_count": 107627, 
  "location": "\u5317\u4eac", 
  "verified_state": 0, 
  "verified_trade": "1189", 
  "mbtype": 0, 
  "verified_source_url": "", 
  "profile_url": "qq993663", 
  "block_word": 0, 
  "avatar_hd": "http://ww1.sinaimg.cn/crop.90.67.242.242.1024/6a383359jw1eb6dntk3dcj21020r2q7y.jpg", 
  "star": 0, 
  "description": "MIUI\u5f00\u53d1\u5de5\u7a0b\u5e08", 
  "verified_contact_email": "", 
  "online_status": 0, 
  "mbrank": 0, 
  "verified_level": 2, 
  "profile_image_url": "http://tp2.sinaimg.cn/1782068057/50/5681010393/1", 
  "idstr": "1782068057", 
  "verified_contact_mobile": "", 
  "allow_all_act_msg": false, 
  "allow_all_comment": false, 
  "geo_enabled": true, 
  "class": 1, 
  "screen_name": "\u5b59\u9e4f_\u5c0f\u7c73", 
  "lang": "zh-cn", 
  "weihao": "", 
  "remark": "", 
  "favourites_count": 164, 
  "name": "\u5b59\u9e4f_\u5c0f\u7c73", 
  "url": "", 
  "province": "11", 
  "created_at": "Fri Jul 23 00:31:27 +0800 2010", 
  "verified_contact_name": "", 
  "verified_type": 0, 
  "gender": "m", 
  "following": false, 
  "pagefriends_count": 0, 
  "urank": 23
 }, 
 "geo": null, 
 "created_at": "Sun Jan 04 11:30:09 +0800 2015", 
 "mlevel": 0, 
 "comments_count": 5623
}
Output primary weibo done!

旅行并不孤单 Courteous 他的小江湖 草亲木 麦一川 暴走的秋裤侠 b默默地聆听 丶__________丶 Gaming-_ 冷绎 一缕绯缚 tpang 哗哗落花花_不溶于水 黑染-因为奶盖*改为宫染 Tayutai_冰龍 saber思密达 涩_Se 空空炭丧病地说 Alfieeeeeeee Chaos忘忧草 就像神话 二丶貓 神官姐姐_不立派果推 稻穗下的四叶猫X 黑暗圣殿骑士 诚实一孩纸 請叫我亂七八糟先生 YOYO酱_影日不足 poker--sir 鹿野囚人_1224是konoha生日快乐 关官广贯管管不是光光也不是关关 满怀信仰的吃货 gaoyan189 Remember_noHOPE Yuki_沐942800899 EagleK730 A_TinySpot Nicolascxd 阮寒阳要好好学习 紧制小菊 啊荆柯瑜 师怀海 我是小小小小小小小超 flame电波 斗师哥 刘吴嘉锋 王倩cjppe 副oo 鱼根猫 绥弦 陈小弦大神 skyseanet 超超說_ 不疼尼酱 海楼石恶魔果实 神道主义救援者 千年huafen 低调的淫叫 -Kevin_W 北方冰泣 福二代么 美优优 威海徐红卫 天蝎Cc座 马小羊mmmmmm acforfun 晨曦同学03 水天殇 南宫柳溪 幸福晚点名1314 Tempay 锦素-梓天 完美丿痞子 人一中要 窮凶極惡謝老狗 liu浪的人2013 桂木顺九 虹口铁杆散客 ZH_Ray mandfx 枫荻秋瑟瑟 白马倚斜桥 南山忆古今 流举 王者长城H1 常州魅族专卖店 Sn0wView IT坟墓 菌临天下一蘑菇 C崔玉婷C 贵圈真X 天知道我的故事 王维我错了 小姐给我来个杯子 用户名不存在鸟 是在和 微波炉Galazb 飞翔的亲亲 路狄凡耶夫 康师傅这名字都有人用 熊猫他急了 纱布1 正正正正丶 武汉手机圈小贝 -诙谐佬 王者伟方 反方向的大笨钟 阿婷_justCoding twilight菡 好像什么困境都知道怎么办 Jarek_Len 屑风的八酒杯 钱亘敦 柒块钱 小小丶帅 菜鸟bang 狂少六子 猴大铭 張雲煬 hubj627 不能吃肉的狼不是好狗 遗忘的书剑子 害怕离去的背影 Ever_半颗牙 LeoCatcher 不见的素颜 Kimi_TT gdmzboy 一真见仁 dianfanbao EdwArd_蔡楚華 J知道K不知道 hHoweG 花瓶大同学 gdmzboy Ever_半颗牙 无敌牛宝宝 乌拉拉_2_ 周磊LZhou 突然又想起了你 电商KK 雪方朔夜_欧派欧派欧 有远见的竖子萧 徐公全 别让昨天的雨淋湿今天的衣 牵牛_牛 官小辉X 成熟俏妞 小钻风114 涛子涛 盲栈无动态 岑小煜 R_t_x_ 含蓄缄默 尛皛xTeRRy 尤建彬 Android-手机论坛 梅花盗的孙子 弘卡 风君子Real 逝夏2627 赵汉霖 ZM只20你 陈日伟fj555 给我一盒菊花茶 -LUCKY庆 阿莫丷 命中缺钱五行缺金 張小聰naka 尹恒武 TMS-刘 MartinLau_ Li哲翰 微笑大叔罒 o晓凯儿o 夜晚漫步在布鲁克林的大猫 apomnpdu虐无数 兰花晓月晴天 你瞅啥呀你瞅 独孤素雅 一直很安静moto 寒_寒 凌雪千泷 流氓_杜 庄嘉豪是英俊潇洒的文科男 伈怡刷帖号 Jean-Paul-Sartre 反方向的大笨钟 呵该怎么说 流氓_-_ 暗恋_小屁孩 向量工作室 手机半残废玩着更崩溃 哥特式堕落v 碎羽586 柴木斌 残月咏叹 我的那些誓言 恨_铁 _xxp_ Output repost users done!