本文最后更新于121 天前,其中的信息可能已经过时,如有错误请发送邮件到[email protected]
修改一下Cookie就能用
import json
import requests
import re
import csv
from bs4 import BeautifulSoup
head = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
'Cookie': '__snaker__id=0pJjJb8IUhSdWX7B; SESSIONID=KoJOVuNTtbJ1hg4Iue1H1LR1QATBkybXTVQAvn0EZyO; JOID=VVsQAkicW-C9QOF1cZqr87Fhj0tqu37HnGfGUFa7fMWaYcZSVK-RAd1A43d3iM6oRJoc9IVLnywtSYuebBOpe8A=; osd=UF8dBE2ZX-27ReRxfJyu9rVsiU5vv3PBmWLCXVC-ecGXZ8NXUKKXBNhE7nFyjcqlQp8Z8IhNmikpRI2baRekfcU=; _xsrf=xTj76zeuXt9aX6jTYnISm9nyfeEvp6aL; _zap=5dce53b4-4ecb-41bf-8e40-6d52789f84f0; __zse_ck=003_bG9VTwugI/4lEuBx4XYoKcZuZWZ2QnPYjjWwmxOCY3wmUb2ITspDNJsmkovRUtSFprVvNw/nPH+5pu1mMVvFtcwBonl8gchV1M9cQt9GvxIs; d_c0=ADDSlSRoPhmPTm02tKq2RKtPqiSQXbDC_-k=|1726464779; Hm_lvt_98beee57fd2ef70ccdd5ca52b9740c49=1726464780; HMACCOUNT=3ED68D61EAC0266B; SL_G_WPT_TO=zh-CN; SL_GWPT_Show_Hide_tmp=1; SL_wptGlobTipTmp=1; gdxidpyhxdE=zPkZyGZxKH88dtqgY%2FPB9l0YlxsnDl826uWpl2cCfjEUA8mwjCSZ5N8%5C9%2FE%2B1QW80GDdEWkbBPUr7arHf%5Crg0YM%5CuCMUsrY7NG%5CqikNf5Vzs3L0q8oZlalIBeeYvQp31nIdaYMNymc7ExL6zlgNKw0xD7T4RpemXrYyce3ufoq2WpKu1%3A1726466820589; captcha_session_v2=2|1:0|10:1726466005|18:captcha_session_v2|88:eDVJNjN4WWdTU2pzZktZdFdSeFVDeDE2NXFJa1ZFLzhpdkdaT1psdkp3Mmo3SnpjVUtEYVd3RGJkWHZuUUlicA==|9840f8e5c0f16879292c65d406db19ea7f70bc67d696efc43dc3f1a3bac553a0; captcha_ticket_v2=2|1:0|10:1726466017|17:captcha_ticket_v2|728:eyJ2YWxpZGF0ZSI6IkNOMzFfenRtMThxdW5CUkJ2YWkxLnVwNXIxdWZFWlZoLldqTjZ0VHJ6OXVBaWNacG5NTk12b3phTEVKVHdkT2tvNE9mNHNERCo1REtzQllNazJxRWRlMldzM3NTX2JlNHM0bHBuKlhlS3hzeWhzZipaKndEeHAxRE5GNThWS2Q2REUzaXBhSEQyaVZ3WFlHclgzOWhJY3VxTnpzRjllMXZCWXMzMnpneHFIZHlyMFM1Q1dxZXFRdXdiQUM5VC41Kk1aNDZRT1lEX0hYZkk4d0RjZW95MW1vcmlrNERoKmxXYXRSZUw2aFFfR05pMThFMnBPTjBUdHgwNkZXd0N6R1QwY185VXVDOGRfOF9HcGZxdlR5OE1KZm1HVGVjT0pNdDF0cnRLYWZVYlZYUmN4a2VQNWd1c0FNR2xHNWI5MHdYM1R2WW1rRVhUdlJ1czNSTUNqdlVqSTh1TDYzd0lfU0VvckdrX1RHZGExNXhtNnZVdEF3ODhjaXJldG1aU3BWeXkuQlRKeE1QNkJmQ1ZUQUEzUGRNLjVHeG42ZE8qWm4ySUEyNldaUXhRemRFVnJBVDhjRkFsVm9JT1FOMVA1cVdTMzJrczE0ZzVwQ0pPQWNyaVBzOTFNaEZ1VnhWWEpfZXRXVC5zcFZuNmNLdHpzZ2FzU25YdnZNaGVDWk1FSzBRX2dfKlJMWEpuaVk3N192X2lfMSJ9|5b6b9979a49a85a237fea98902feae54fdac976c185cf9392773fedd23c18c54; z_c0=2|1:0|10:1726466018|4:z_c0|92:Mi4xX0o0bE9BQUFBQUFBTU5LVkpHZy1HU1lBQUFCZ0FsVk40aFhWWndDbU9DTjgxY1JJbkZpeTliUjFUSDZSU1pKOW1B|5edac54d09d7373b7e80408e848673d88c329daca603e4bffff00aec8dd13c1a; tst=r; Hm_lpvt_98beee57fd2ef70ccdd5ca52b9740c49=1726466095; BEC=b7b0f394f3fd074c6bdd2ebbdd598b4e'
}
photo_task = []
response = requests.get(url='https://zhuanlan.zhihu.com/p/402355727', headers=head)
response.encoding = 'utf-8'
text_soup = BeautifulSoup(response.text, 'html.parser')
re_pose = text_soup.find('div', attrs={'class': 'RichText ztext Post-RichText css-1ygg4xu'})
for i in re_pose.find_all('img', attrs={'class': 'origin_image zh-lightbox-thumb'}):
photo_task.append(i.get('data-original'))
for j in photo_task:
photo_split = j.split('/')[3].split('.')[0]
with open(f'../pythonspider/other/zhihu/{photo_split}.jpg', mode='wb') as p:
p.write(requests.get(url=j).content)
print(f'{photo_split}--下载完毕')