阿里云函数计算使用selenium进行网页截图完成代码示例
# -*- coding: utf-8 -*-
# tools.bugscaner.com
import logging
import hashlib
import json
import time
import oss2
from selenium import webdriver
from cgi import parse_qs, escape
def handler(environ, start_response):
context = environ['fc.context']
request_uri = environ['fc.request_uri']
for k, v in environ.items():
if k.startswith('HTTP_'):
# process custom request headers
pass
# do something here
#获取post参数
try:
request_body_size = int(environ.get('CONTENT_LENGTH', 0))
except:
request_body_size = 0
request_body = environ['wsgi.input'].read(request_body_size)
input_urls = parse_qs(request_body)
logging.warning(input_urls)
input_url = input_urls.get("url",[''])[0]
logging.warning(input_url)
#谷歌浏览器设置
option = webdriver.ChromeOptions()
option.add_argument('user-agent="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17"')
option.add_argument("--headless")
option.add_argument("--no-sandbox")
option.add_argument("--disable-gpu")
option.binary_location = "./headless-chromium"
driver = webdriver.Chrome(executable_path="./chromedriver",chrome_options=option)
#设置页面访问超时时间
driver.set_page_load_timeout(5)
driver.set_script_timeout(5)#这两种设置都进行才有效
#driver = webdriver.PhantomJS(service_log_path="/tmp/logssssss.log")
try:
driver.get(input_url)
except:
return [json.dumps({"status":404})]
scroll_width = driver.execute_script('return document.body.parentNode.scrollWidth')
scroll_height = driver.execute_script('return document.body.parentNode.scrollHeight')
driver.set_window_size(scroll_width, scroll_height)
#time.sleep(2)
images = driver.get_screenshot_as_png()
auth = oss2.Auth("LT33HkjkEz", "33Ns4AjdvG1Ow2sxvtzmjcGbvKtECX")
bucket = oss2.Bucket(auth, 'oss-cn-hangzhou.aliyuncs.com', 'pic')
filenamemd5 = hashlib.md5(input_url)
filenamemd5 = str(filenamemd5.hexdigest())
ifover = False
for x in range(2):
try:
bucket.put_object(filenamemd5+".png", images)
ifover = True
break
except:
pass
if ifover:
status = '200 OK'
response_headers = [('Content-type', 'text/plain')]
start_response(status, response_headers)
return [json.dumps({"status":200,"picurl":filenamemd5})]
else:
status = '200 OK'
response_headers = [('Content-type', 'text/plain')]
start_response(status, response_headers)
return [json.dumps({"status":403})]
|
截图实例地址:http://tools.bugscaner.com/urltopic.html




