阿里云函数计算使用selenium进行网页截图完成代码示例
# -*- coding: utf-8 -*- # tools.bugscaner.com import logging import hashlib import json import time import oss2 from selenium import webdriver from cgi import parse_qs, escape def handler(environ, start_response): context = environ['fc.context'] request_uri = environ['fc.request_uri'] for k, v in environ.items(): if k.startswith('HTTP_'): # process custom request headers pass # do something here #获取post参数 try: request_body_size = int(environ.get('CONTENT_LENGTH', 0)) except: request_body_size = 0 request_body = environ['wsgi.input'].read(request_body_size) input_urls = parse_qs(request_body) logging.warning(input_urls) input_url = input_urls.get("url",[''])[0] logging.warning(input_url) #谷歌浏览器设置 option = webdriver.ChromeOptions() option.add_argument('user-agent="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17"') option.add_argument("--headless") option.add_argument("--no-sandbox") option.add_argument("--disable-gpu") option.binary_location = "./headless-chromium" driver = webdriver.Chrome(executable_path="./chromedriver",chrome_options=option) #设置页面访问超时时间 driver.set_page_load_timeout(5) driver.set_script_timeout(5)#这两种设置都进行才有效 #driver = webdriver.PhantomJS(service_log_path="/tmp/logssssss.log") try: driver.get(input_url) except: return [json.dumps({"status":404})] scroll_width = driver.execute_script('return document.body.parentNode.scrollWidth') scroll_height = driver.execute_script('return document.body.parentNode.scrollHeight') driver.set_window_size(scroll_width, scroll_height) #time.sleep(2) images = driver.get_screenshot_as_png() auth = oss2.Auth("LT33HkjkEz", "33Ns4AjdvG1Ow2sxvtzmjcGbvKtECX") bucket = oss2.Bucket(auth, 'oss-cn-hangzhou.aliyuncs.com', 'pic') filenamemd5 = hashlib.md5(input_url) filenamemd5 = str(filenamemd5.hexdigest()) ifover = False for x in range(2): try: bucket.put_object(filenamemd5+".png", images) ifover = True break except: pass if ifover: status = '200 OK' response_headers = [('Content-type', 'text/plain')] start_response(status, response_headers) return [json.dumps({"status":200,"picurl":filenamemd5})] else: status = '200 OK' response_headers = [('Content-type', 'text/plain')] start_response(status, response_headers) return [json.dumps({"status":403})] |
截图实例地址:http://tools.bugscaner.com/urltopic.html