您现在的位置是:技术吧
  • Excel快速计算排名

    注意使用$标识绝对位置

    示例下载:计算排名.xlsx

    【排名A】相同成绩计算排名

    比如第【3】名共有三个,接下来是第【6】名,RANK(B2,$B$2:$B$11),

    【排名B】相同成绩不占用名次

    比如第【3】名共有三个,接下来是第【4】名,SUMPRODUCT(($B$2:$B$11>B4)/COUNTIF($B$2:$B$11,$B$2:$B$11))+1


    Excel快速计算排名.png

    yuli0927 2019年08月13日 标签: Excel分类: Office

  • Excel计算年龄周岁和虚岁

    示例下载:计算周岁和虚岁.xlsx

    假设A4是出生日期

    周岁:DATEDIF(A4,TODAY(),"Y")

    虚岁:YEAR(TODAY())-YEAR(A4)

    Excel计算年龄周岁和虚岁.png


    yuli0927 2019年08月13日 标签: Excel分类: Office

  • Python分析网页并下载图片

    #!/usr/bin/python
    # -*- coding: UTF-8 -*-
    
    import requests
    import re
    import os
    import urllib.request
    
    from lxml import etree
    import html
    
    ROOT_DIR = 'dalian/'
    IMG_DIR = ROOT_DIR + 'images/'
    
    ## 提取图片
    def openOrder(ORDER_FILE):
        print(ORDER_FILE)
        # 打开文件
        fo = open(ORDER_FILE, "r")
        FILE_TEXT = fo.read()
        fo.close()
        # 提取图片地址
        html_text = etree.HTML(FILE_TEXT)
        #img_data = html_text.xpath("//div[class='imgList']/a/@href")
        img_data = html_text.xpath("//a/img//@src")
        for IMG_URL in img_data:
            print(IMG_URL)
            IMG_NAME = os.path.basename(IMG_URL)
            print(IMG_NAME)
            # 判断是否200
            request = requests.get(IMG_URL)
            httpStatusCode = request.status_code
            if(httpStatusCode == 200):
                urllib.request.urlretrieve(IMG_URL, IMG_DIR + IMG_NAME)
            #str=etree.tostring(index)
            #print(str)
    
    
    # 循环查找文件
    for maindir, subdir, file_name_list in os.walk(ROOT_DIR):
        for file_name in file_name_list:
            # print(file_name)
            if(file_name.startswith('app')):
                # print("YYYYYY")
                # 完整路径
                ORDER_FILE = os.path.join(maindir, file_name)
                openOrder(ORDER_FILE)
            #else:
                # print("NNNNNN")

    yuli0927 2019年08月09日 标签: Python分类: Python

  • Python分析网页并抓取内容

    #!/usr/bin/python
    # -*- coding: UTF-8 -*-
    
    import requests
    import re
    import os
    
    from lxml import etree
    import html
    
    CITY = 'city'
    USERNAME = 'user'
    PASSWORD = 'name'
    
    # 登录缓存
    def getSession(username, password):
        # 登录页
        LOGIN_URL = 'http://www.test.com/index.php?ajax=1'
        # 账号密码
        DATA = {"com":'com_passport',"method":'dologin',"ID":username,"PWD":password,"checkbox":'on'}
        # 浏览器
        HEADERS = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'}
        # 保存登录参数
        ROOM_SESSION  = requests.Session()
        ROOM_SESSION.post(LOGIN_URL,data=DATA,headers=HEADERS)
        return ROOM_SESSION
    
    # 获取列表
    def getPageUrl(num):
        PAGE_BASE = 'http://www.test.com/index.php?method=index&app=order&page='
        return PAGE_BASE + str(num)
        
    # 保存文件
    def saveFile(text, name):
        fo = open(name, "w")
        fo.write(text)
        fo.close()
    
    
    # 错误记录
    def addLog(type, text):
        ERR_FILE = CITY + '_' + type + '.txt'
        ferror = open(ERR_FILE, "a")
        ferror.write(text)
        ferror.write("\n")
        ferror.close()
    
    
    
    ##########################################################
    
    # 城市文件
    FILE_NAME = CITY + '_index.html'
    # 打开文件
    fo = open(FILE_NAME, 'r', encoding='utf-8')
    text = fo.read()
    fo.close()
    
    
    # 登录
    SESSION =getSession(USERNAME, PASSWORD)
    
    
    # 提取URL
    html = etree.HTML(text)
    url_data = html.xpath('//a/@href')
    for ORDER_URL in url_data:
        ORDER_RES = SESSION.get(ORDER_URL)
        # 日志
        addLog('order_log', ORDER_URL)
        print(ORDER_URL)
        print(ORDER_RES.status_code)
        # 获取页面
        if(ORDER_RES.status_code!=200):
            addLog('order_error', '获取'+ ORDER_URL +'失败')
        else:
            addLog('success', '获取'+ ORDER_URL +'成功')
            ORDER_TEXT = ORDER_RES.text
            matchObj = re.search(r"app=order(.*)", ORDER_URL)
            ORDER_NAME = matchObj.group()
            ORDER_FILE = CITY + '/' + ORDER_NAME + '.html'
            # 保存文件
            saveFile(ORDER_TEXT,ORDER_FILE)
            print(ORDER_FILE)

    yuli0927 2019年08月09日 标签: Python分类: Python

  • Python抓取订单页面

    #!/usr/bin/python
    # -*- coding: UTF-8 -*-
    
    import requests
    import re
    
    from lxml import etree
    import html
    
    CITY = 'city'
    USERNAME = 'name'
    PASSWORD = 'password'
    
    PAGE_START = 1
    PAGE_END = 1100
    
    
    # 登录缓存
    def getSession(username, password):
        # 登录页
        LOGIN_URL = 'http://www.test.com/index.php?ajax=1'
        # 账号密码
        DATA = {"com":'com_passport',"method":'dologin',"ID":username,"PWD":password,"checkbox":'on'}
        # 浏览器
        HEADERS = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'}
        # 保存登录参数
        ROOM_SESSION  = requests.Session()
        ROOM_SESSION.post(LOGIN_URL,data=DATA,headers=HEADERS)
        return ROOM_SESSION
    
    # 获取列表
    def getPageUrl(num):
        PAGE_BASE = 'http://www.test.com/index.php?method=index&app=order&page='
        return PAGE_BASE + str(num)
        
    # 保存文件
    def saveFile(text, name):
        fo = open(name, "w")
        fo.write(text)
        fo.close()
    
    
    # 错误记录
    def addLog(type, text):
        ERR_FILE = CITY + '_' + type + '.txt'
        ferror = open(ERR_FILE, "a")
        ferror.write(text)
        ferror.write("\n")
        ferror.close()
    
    ## 获取详情
    def getOrder(text):
        order_text = ''
        html_text = etree.HTML(text)
        contents = html_text.xpath("//div[@class='order-content']")
        # 遍历订单
        for index in range(len(contents)) :        
            div_str = etree.tostring(contents[index]).decode('utf-8')
            div_str = html.unescape(div_str)
            order_text += div_str
            order_text += "\n"
        return order_text
    
    
    ##########################################################
    
    # 城市文件
    fcity = open(CITY + '_index.html', "w")
    fcity.write("<link rel='stylesheet' type='text/css' href='style.css'>\n")
    
    
    # 登录
    SESSION =getSession(USERNAME, PASSWORD)
    
    
    
    # 循环
    for num in range(PAGE_START, PAGE_END):
        page = str(num)
        PAGE_FILE = CITY + '/page_' + page.zfill(5) + '.html'
        PAGE_URL = getPageUrl(page)
        PAGE_RES = SESSION.get(PAGE_URL)
        # 日志
        addLog('log', PAGE_URL)
        print(PAGE_URL)
        print(PAGE_RES.status_code)
        # 获取页面
        if(PAGE_RES.status_code!=200):
            addLog('error', '获取第'+ page +'页失败')
        else:
            addLog('success', '获取第'+ page +'页成功')
            # 保存文件
            PAGE_TEXT = PAGE_RES.text
            saveFile(PAGE_TEXT,PAGE_FILE)
            # 获取订单
            ORDER_TEXT = getOrder(PAGE_TEXT)
            fcity.write("\n第" + page + "页\n")
            fcity.write(ORDER_TEXT)
            if(ORDER_TEXT == ''):
                fcity.write("获取内容失败")
                addLog('error', '获取第'+ page +'页详情失败')

    yuli0927 2019年08月09日 标签: Python分类: Python

1 2 3 4 5 6 7 8 ... Next »... Last »