import requests import json from bs4 import BeautifulSoup
url='https://fe-api.zhaopin.com/c/i/sou?pageSize=90&cityId=489&workExperience=-1&education=-1&companyType=-1&employmentType=-1&jobWelfareTag=-1&kw=%E5%A4%A7%E6%95%B0%E6%8D%AE&kt=3&_v=0.79005936&x-zp-page-request-id=520adc5dcbde404f8f20d5c0846b54b5-1562324160122-21643&x-zp-client-id=0d243d91-4d7b-43f7-9551-07854f531ab2' header={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36"}
def spider_job_content(): response=requests.get(url=url,headers=header) print(response.status_code) if response.status_code==200: response.encoding='utf-8' html=response.text
html0=json.loads(html) text='' file=open('spider_job.txt','w',encoding='utf-8') for result in html0['data']['results']: # print(result['company']['name']) jobname=result["jobName"] company=result['company']['name'] URL=result['company']['url'] update=result["updateDate"] salary=result['salary'] jobType=result["jobType"]["items"][0]["name"] workingExp=result["workingExp"]['name'] welfare=str(result["welfare"]) text+='工作名称:'+jobname+'\n' text+='公司名称:'+company+'\n' text+='公司网站:'+url+'\n' text+='更新日期:'+update+'\n' text+='薪水:'+salary+'\n' text+='工作类型:'+jobType+'\n' text+='工作经验:'+workingExp+'\n' text+='福利:'+welfare+'\n\n\n'
# print(html0['data']['results'][0]['company']['name']) # div_list=bfs.find_all('div',attrs={'class':'c-chat-ads'}) # print(div_list) # for div in div_list: # job_name=bfs.find('div',attrs={'class':'contentpile__content__wrapper__item__info__box__jobname jobName'}) # print(job_name) # text+='工作名称:'+job_name+'\n'
file.write(text) file.close()
if __name__=='__main__': spider_job_content() 老师的代码:
import csv import requests import json from lxml import etree
class ZhiLian: def __init__(self): self.headers={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36"}