python进阶-04课程源码
资源文件列表(大概)
__pycache__/main.cpython-36.pyc
351B
api/__pycache__/__init__.cpython-36.pyc
100B
api/crawler/__pycache__/
-
api/crawler/__pycache__/fanyi.cpython-36.pyc
2.97KB
api/crawler/__pycache__/params.cpython-36.pyc
372B
api/crawler/fanyi.py
5.19KB
api/crawler/params.py
200B
api/crawler/text.html
12.97KB
api/system/__pycache__/user.cpython-36.pyc
3.54KB
api/system/user/__pycache__/
-
api/system/user/__pycache__/params.cpython-36.pyc
650B
api/system/user/__pycache__/user.cpython-36.pyc
3.1KB
api/system/user/params.py
310B
api/system/user/user.py
3.96KB
tutorial/geckodriver.log
34.63KB
tutorial/log/scrapy_2024_11_25.log
98.5KB
tutorial/log/scrapy_2024_11_26.log
49.63KB
tutorial/quotes-1.html
10.8KB
tutorial/quotes-2.html
13.42KB
tutorial/quotes.jsonlines
10.84KB
tutorial/tutorial/__init__.py
-
tutorial/tutorial/__pycache__/
-
tutorial/tutorial/__pycache__/__init__.cpython-36.pyc
146B
tutorial/tutorial/__pycache__/items.cpython-36.pyc
732B
tutorial/tutorial/__pycache__/pipelines.cpython-36.pyc
612B
tutorial/tutorial/__pycache__/save_Image_pipeline.cpython-36.pyc
1.01KB
tutorial/tutorial/__pycache__/settings.cpython-36.pyc
2.87KB
tutorial/tutorial/__pycache__/text_download_pipeline.cpython-36.pyc
1.09KB
tutorial/tutorial/__pycache__/video_download_pipeline.cpython-36.pyc
1.58KB
tutorial/tutorial/items.py
754B
tutorial/tutorial/middlewares.py
3.57KB
tutorial/tutorial/pipelines.py
362B
tutorial/tutorial/save_Image_pipeline.py
1.24KB
tutorial/tutorial/settings.py
6.4KB
tutorial/tutorial/spiders/
-
tutorial/tutorial/spiders/__init__.py
161B
tutorial/tutorial/spiders/__pycache__/
-
tutorial/tutorial/spiders/__pycache__/__init__.cpython-36.pyc
154B
tutorial/tutorial/spiders/__pycache__/quotes_spider.cpython-36.pyc
2.38KB
tutorial/tutorial/spiders/__pycache__/xbiqugu.cpython-36.pyc
1.7KB
tutorial/tutorial/spiders/dload_files.py
1.05KB
tutorial/tutorial/spiders/quotes_spider.py
3.58KB
tutorial/tutorial/spiders/xbiqugu.py
1.54KB
tutorial/tutorial/text_download_pipeline.py
1.27KB
tutorial/tutorial/video_download_pipeline.py
1.36KB
utils/__pycache__/__init__.cpython-312.pyc
148B
utils/__pycache__/__init__.cpython-36.pyc
134B
utils/__pycache__/config_helper.cpython-36.pyc
862B
utils/__pycache__/module1.cpython-312.pyc
270B
utils/__pycache__/module1.cpython-36.pyc
250B
utils/__pycache__/module2.cpython-312.pyc
270B
utils/__pycache__/module2.cpython-36.pyc
250B
utils/__pycache__/orm_helper.cpython-36.pyc
6.75KB
utils/__pycache__/pymysql_helper.cpython-36.pyc
2.37KB
utils/config_helper.py
867B
utils/orm_helper.py
5.83KB
utils/pymysql_helper.py
2.86KB
资源内容介绍
python进阶-04课程源码 # Scrapy settings for tutorial project## For simplicity, this file contains only settings considered important or# commonly used. You can find more settings consulting the documentation:## https://docs.scrapy.org/en/latest/topics/settings.html# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html# https://docs.scrapy.org/en/latest/topics/spider-middleware.htmlimport randomBOT_NAME = 'tutorial'SPIDER_MODULES = ['tutorial.spiders']NEWSPIDER_MODULE = 'tutorial.spiders'# Crawl responsibly by identifying yourself (and your website) on the user-agent#USER_AGENT = 'tutorial (+http://www.yourdomain.com)'# Obey robots.txt rulesROBOTSTXT_OBEY = False# Configure maximum concurrent requests performed by Scrapy (default: 16)#CONCURRENT_REQUESTS = 32# Configure a delay for requests for the same website (default: 0)# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay# See also autothrottle settings and docsDOWNLOAD_DELAY = 0 #每次请求间隔 0 秒# The download delay setting will honor only one of:#CONCURRENT_REQUESTS_PER_DOMAIN = 16#CONCURRENT_REQUESTS_PER_IP = 16# Disable cookies (enabled by default)#COOKIES_ENABLED = False# Disable Telnet Console (enabled by default)#TELNETCONSOLE_ENABLED = False# Override the default request headers:#DEFAULT_REQUEST_HEADERS = {# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',# 'Accept-Language': 'en',#}# Enable or disable spider middlewares# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html#SPIDER_MIDDLEWARES = {# 'tutorial.middlewares.TutorialSpiderMiddleware': 543,#}# Enable or disable downloader middlewares# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#DOWNLOADER_MIDDLEWARES = {# 'tutorial.middlewares.TutorialDownloaderMiddleware': 543,#}# Enable or disable extensions# See https://docs.scrapy.org/en/latest/topics/extensions.html#EXTENSIONS = {# 'scrapy.extensions.telnet.TelnetConsole': None,#}# Configure item pipelines# See https://docs.scrapy.org/en/latest/topics/item-pipeline.htmlITEM_PIPELINES = {# 'tutorial.pipelines.TutorialPipeline': 300,# 'tutorial.save_Image_pipeline.SaveImagePipeline': 300,# 'tutorial.video_download_pipeline.VideoDownloadPipeline': 500, 'tutorial.text_download_pipeline.TextDownloadPipeline':300}# Enable and configure the AutoThrottle extension (disabled by default)# See https://docs.scrapy.org/en/latest/topics/autothrottle.html#AUTOTHROTTLE_ENABLED = True# The initial download delay#AUTOTHROTTLE_START_DELAY = 5# The maximum download delay to be set in case of high latencies#AUTOTHROTTLE_MAX_DELAY = 60# The average number of requests Scrapy should be sending in parallel to# each remote server#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0# Enable showing throttling stats for every response received:#AUTOTHROTTLE_DEBUG = False# Enable and configure HTTP caching (disabled by default)# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings#HTTPCACHE_ENABLED = True#HTTPCACHE_EXPIRATION_SECS = 0#HTTPCACHE_DIR = 'httpcache'#HTTPCACHE_IGNORE_HTTP_CODES = []#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'########################################图片处理########################################设置图片过期时间天数#专业图片服务器:FastDFS, TFSIMAGES_EXPIRES = 90#IMAGES_THUMBS = { 'small': (50, 50), 'big': (270, 270),}import osIMAGES_STORE = 'F:\\zhaoxi_project\\base_project\\part2_spider\\day04\\toscrape\\images'if not os.path.exists(IMAGES_STORE): os.makedirs(IMAGES_STORE)#linux IMAGES_STORE = '/opt/images'#################################################日志功能#debug, info, error, warn 从低到高#level 设置打印日志的水位线#打印日志原则:level=info, 凡是比level的优先级高或者等,则打印日志#线上日志一般level=errorLOG_LEVEL = "INFO"from datetime import datetimeLOG_DIR = "log"if not os.path.exists(LOG_DIR): os.makedirs(LOG_DIR)today = datetime.now()LOG_FILE = f"{LOG_DIR}/scrapy_{today.year}_{today.month}_{today.day}.log"###################################################USER_AGENT_LIST = [ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1", "Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6", "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6", "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5", "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3", "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24", "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24"]USER_AGENT = random.choice(USER_AGENT_LIST)