python 批量采集历年的每日一图到库中

python 批量采集历年的每日一图到库中

批量采集历史所有的每日一词到库中

完整代码

# #采集词霸每日一句插入
from __future__ import print_function
import requests
from bs4 import BeautifulSoup
import os, sys
import time
import json
import datetime
import pymysql
import arrow

def isLeapYear(years):
    '''
    通过判断闰年,获取年份years下一年的总天数
    :param years: 年份,int
    :return:days_sum,一年的总天数
    '''
    # 断言:年份不为整数时,抛出异常。
    assert isinstance(years, int), "请输入整数年,如 2018"
 
    if ((years % 4 == 0 and years % 100 != 0) or (years % 400 == 0)):  # 判断是否是闰年
        # print(years, "是闰年")
        days_sum = 366
        return days_sum
    else:
        # print(years, '不是闰年')
        days_sum = 365
        return days_sum
 
 
def getAllDayPerYear(years):
    '''
    获取一年的所有日期
    :param years:年份
    :return:全部日期列表
    '''
    start_date = '%s-1-1' % years
    a = 0
    all_date_list = []
    days_sum = isLeapYear(int(years))
    print()
    while a < days_sum:
        b = arrow.get(start_date).shift(days=a).format("YYYY-MM-DD")
        a += 1
        all_date_list.append(b)
    # print(all_date_list)
    return all_date_list



if __name__ == '__main__':
    # 获取一年的所有日期

    years = ['2020'.'2021'.'2022']
    db =  pymysql.connect(
        host="******",
        port=3306,
        user='*****',
        passwd='*****',
        db='*****'        
    )
    cur = db.cursor(cursor=pymysql.cursors.DictCursor)

    for year in years:
        all_date_list = getAllDayPerYear(year)
        for day in all_date_list:
            dat={"date": day}
            resp=requests.post("http://open.iciba.com/dsapi/",data=dat)
            json_obj = json.loads(resp.content) 
            text = str('<!--markdown-->>'+json_obj['content']+'\r\n>'+json_obj['note']+'\r\n\r\n\r\n![每日一词][1]\r\n\r\n\r\n  [1]:' + json_obj['fenxiang_img']).strip().replace('"', '\\"')

            sql='''
                INSERT INTO imgcontents (
            title,
            slug,
            created,
            modified,
            text,
            `order`,
            authorId,
            template,
            type,
            `status`,
            `password`,
            commentsNum,
            allowComment,
            allowPing,
            allowFeed,
            parent 
        )
        VALUES
            (
                "'''+str(json_obj['dateline']) +'''",
                "'''+ str(time.time()) +'''",
                '''+ str(time.time()) +''',
                '''+ str(time.time()) +''',
            "'''+text+'''",
                0,
                1,
                '',
                'post',
                'publish',
                '',
                0,
                1,
                1,
                1,
                0
            )
            '''
            print(sql)
            cur.execute(sql)
            # #获取最后插入的文章id
            lastid = str(cur.lastrowid)
            # #给文章添加分类,否则前端不显示,分类id自己去表中查看
            sql1 = 'INSERT INTO imgrelationships(`cid`,`mid`) VALUES('+ lastid +',3)'
            cur.execute(sql1)
            sql2 = 'INSERT INTO imgrelationships(`cid`,`mid`) VALUES('+ lastid +',4)'
            cur.execute(sql2)
    db.commit()       
    cur.close()
    db.close()

              
添加新评论