python 批量采集历年的每日一图到库中
批量采集历史所有的每日一词到库中
完整代码
# #采集词霸每日一句插入
from __future__ import print_function
import requests
from bs4 import BeautifulSoup
import os, sys
import time
import json
import datetime
import pymysql
import arrow
def isLeapYear(years):
'''
通过判断闰年,获取年份years下一年的总天数
:param years: 年份,int
:return:days_sum,一年的总天数
'''
# 断言:年份不为整数时,抛出异常。
assert isinstance(years, int), "请输入整数年,如 2018"
if ((years % 4 == 0 and years % 100 != 0) or (years % 400 == 0)): # 判断是否是闰年
# print(years, "是闰年")
days_sum = 366
return days_sum
else:
# print(years, '不是闰年')
days_sum = 365
return days_sum
def getAllDayPerYear(years):
'''
获取一年的所有日期
:param years:年份
:return:全部日期列表
'''
start_date = '%s-1-1' % years
a = 0
all_date_list = []
days_sum = isLeapYear(int(years))
print()
while a < days_sum:
b = arrow.get(start_date).shift(days=a).format("YYYY-MM-DD")
a += 1
all_date_list.append(b)
# print(all_date_list)
return all_date_list
if __name__ == '__main__':
# 获取一年的所有日期
years = ['2020'.'2021'.'2022']
db = pymysql.connect(
host="******",
port=3306,
user='*****',
passwd='*****',
db='*****'
)
cur = db.cursor(cursor=pymysql.cursors.DictCursor)
for year in years:
all_date_list = getAllDayPerYear(year)
for day in all_date_list:
dat={"date": day}
resp=requests.post("http://open.iciba.com/dsapi/",data=dat)
json_obj = json.loads(resp.content)
text = str('<!--markdown-->>'+json_obj['content']+'\r\n>'+json_obj['note']+'\r\n\r\n\r\n![每日一词][1]\r\n\r\n\r\n [1]:' + json_obj['fenxiang_img']).strip().replace('"', '\\"')
sql='''
INSERT INTO imgcontents (
title,
slug,
created,
modified,
text,
`order`,
authorId,
template,
type,
`status`,
`password`,
commentsNum,
allowComment,
allowPing,
allowFeed,
parent
)
VALUES
(
"'''+str(json_obj['dateline']) +'''",
"'''+ str(time.time()) +'''",
'''+ str(time.time()) +''',
'''+ str(time.time()) +''',
"'''+text+'''",
0,
1,
'',
'post',
'publish',
'',
0,
1,
1,
1,
0
)
'''
print(sql)
cur.execute(sql)
# #获取最后插入的文章id
lastid = str(cur.lastrowid)
# #给文章添加分类,否则前端不显示,分类id自己去表中查看
sql1 = 'INSERT INTO imgrelationships(`cid`,`mid`) VALUES('+ lastid +',3)'
cur.execute(sql1)
sql2 = 'INSERT INTO imgrelationships(`cid`,`mid`) VALUES('+ lastid +',4)'
cur.execute(sql2)
db.commit()
cur.close()
db.close()