-->
当前位置:首页 > ELSE > 正文内容

MSDN磁力链爬虫

Luz4年前 (2021-11-02)ELSE5213
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. import re
  4. import json
  5. import requests
  6.  
  7. API_MSDN_INDEX = 'https://msdn.itellyou.cn/'
  8.  
  9. API_INDEX = 'https://msdn.itellyou.cn/Index/GetCategory'
  10.  
  11. API_GET_LANG = 'https://msdn.itellyou.cn/Index/GetLang'
  12.  
  13. API_GET_LIST = 'https://msdn.itellyou.cn/Index/GetList'
  14.  
  15. API_GET_PRODUCT = 'https://msdn.itellyou.cn/Index/GetProduct'
  16.  
  17. headers = {
  18. 'Referer':'https://msdn.itellyou.cn/',
  19. 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36',
  20. 'x-csrf-token':'CfDJ8Pkxne5b6ctAqJ7E_I5QAwlj_jfdNMW73lm7EXmK-wDUYnSG5jjFP_h5Wu-c9K-fzc0Ak5mgWc2sZlxXD0pvLako3f1eZ4l93SGD8djxN578BF7z7RUY1JMHihxkeYSmU6SLTlpilXTANrU2GCmagbE',
  21. 'cookie': 'UM_distinctid=17b96ee341b1ca-07d12fbf3a44a8-c343365-1fa400-17b96ee341c473; _ga=GA1.2.869490056.1630324799; .AspNetCore.Antiforgery.kC_Kc8he0KM=CfDJ8Pkxne5b6ctAqJ7E_I5QAwmrDvKmuf5nZKhFFl4mStgoRtDUvVyMMP41rRppdfK2HGaS4Pb5quYbQVc35oV2YR0oHszJ2cZre9tSd0pPSOJSk0QqE_qf6ENkEBRtBGRu4IWPGLrfIUZFOPAhHcMXP78; _gid=GA1.2.41224256.1632673751; Hm_lvt_8688ca4bc18cbc647c9c68fdaef6bc24=1630324799,1632673751; CNZZDATA1605814=cnzz_eid%3D294278726-1630317960-https%253A%252F%252Fwww.baidu.com%252F%26ntime%3D1632708437; Hm_lpvt_8688ca4bc18cbc647c9c68fdaef6bc24=1632718285'
  22. }
  23.  
  24. RESULT = {'data':[]} 
  25. def get_product(id):
  26.     r = requests.post(API_GET_PRODUCT, headers=headers, data={'id':id})
  27.     #print(r.text)
  28.     if r.status_code == requests.codes.ok:
  29.             item = r.json().get('result')
  30.             print('文件名:%s' % item.get('filename'))
  31.             #print 'PostData:%s' % item.get('PostDateString')
  32.             print('SHA1校验值:%s' % item.get('sha1'))
  33.             print('文件大小:%s' % item.get('size'))
  34.             print('下载链接:%s' % item.get('download'))
  35.             return item
  36.  
  37.  
  38. def get_list(id, lang_id):
  39.     r = requests.post(API_GET_LIST, headers=headers, data={'id':id, 'lang':lang_id, 'filter':'true'})
  40.     if r.status_code == requests.codes.ok:
  41.         product_list = []
  42.         for item in r.json().get('result'):
  43.             product_info = get_product(item.get('id'))
  44.             product_list.append(product_info)
  45.         return product_list
  46.  
  47. def get_lang(id):
  48.     r = requests.post(API_GET_LANG, headers=headers, data={'id':id})
  49.     if r.status_code == requests.codes.ok:
  50.             lang_list = []
  51.             for lang in r.json().get('result'):
  52.                 print("语言:",lang.get('lang'))
  53.                 info = {'lang':lang.get('lang'), 'product_list':get_list(id,lang.get('id'))}
  54.                 lang_list.append(info)
  55.             return lang_list
  56.  
  57.  
  58. def get_download_list(category_id):
  59.     r = requests.post(API_INDEX, headers=headers, data={'id':category_id})
  60.     #print(r.text)
  61.     if r.status_code == requests.codes.ok:
  62.             for item in r.json():
  63.                 #print('System Name: %s'% item.get('name'))
  64.                 system_info = get_lang(item.get('id'))
  65.                 system_info = {'name':item.get('name'), 'lang_list':system_info}
  66.                 RESULT['data'].append(system_info)
  67.             #print('finishied!!!')
  68.             return RESULT
  69.  
  70.  
  71. if __name__ == '__main__':
  72.     json_buffer = get_download_list('7AB5F0CB-7607-4BBE-9E88-50716DC43DE6')
  73.     #print(json_buffer)
  74.     #json.dump(json_buffer,f)

发表评论

访客

◎欢迎参与讨论,请在这里发表您的看法和观点。