阿里云函数计算使用selenium进行网页截图完成代码示例

# -*- coding: utf-8 -*-
# tools.bugscaner.com
import logging
import hashlib
import json
import time
import oss2
from selenium import webdriver
from cgi import parse_qs, escape


def handler(environ, start_response):
    context = environ['fc.context']
    request_uri = environ['fc.request_uri']
    for k, v in environ.items():
      if k.startswith('HTTP_'):
        # process custom request headers
        pass
    # do something here
    
    #获取post参数
    try:
        request_body_size = int(environ.get('CONTENT_LENGTH', 0))
    except:
        request_body_size = 0
    request_body = environ['wsgi.input'].read(request_body_size)
    input_urls = parse_qs(request_body)
    logging.warning(input_urls)
    input_url = input_urls.get("url",[''])[0]
    logging.warning(input_url)
    #谷歌浏览器设置
    option = webdriver.ChromeOptions()
    option.add_argument('user-agent="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17"')
    option.add_argument("--headless")
    option.add_argument("--no-sandbox")
    option.add_argument("--disable-gpu")
    option.binary_location = "./headless-chromium"
    driver = webdriver.Chrome(executable_path="./chromedriver",chrome_options=option)
    #设置页面访问超时时间
    driver.set_page_load_timeout(5)
    driver.set_script_timeout(5)#这两种设置都进行才有效
    #driver = webdriver.PhantomJS(service_log_path="/tmp/logssssss.log")
    try:
        driver.get(input_url)
    except:
        return [json.dumps({"status":404})]
    scroll_width = driver.execute_script('return document.body.parentNode.scrollWidth')
    scroll_height = driver.execute_script('return document.body.parentNode.scrollHeight')
    driver.set_window_size(scroll_width, scroll_height)
    #time.sleep(2)
    images = driver.get_screenshot_as_png()
    auth = oss2.Auth("LT33HkjkEz", "33Ns4AjdvG1Ow2sxvtzmjcGbvKtECX")
    bucket = oss2.Bucket(auth, 'oss-cn-hangzhou.aliyuncs.com', 'pic')
    filenamemd5 = hashlib.md5(input_url)
    filenamemd5 = str(filenamemd5.hexdigest())
    ifover = False
    for x in range(2):
        try:
            bucket.put_object(filenamemd5+".png", images)
            ifover = True
            break
        except:
            pass
    if ifover:
        status = '200 OK'
        response_headers = [('Content-type', 'text/plain')]
        start_response(status, response_headers)
        return [json.dumps({"status":200,"picurl":filenamemd5})]    
    else:
        status = '200 OK'
        response_headers = [('Content-type', 'text/plain')]
        start_response(status, response_headers)
        return [json.dumps({"status":403})]    

截图实例地址:http://tools.bugscaner.com/urltopic.html

您可能还会对下面的文章感兴趣: