웹 | 셀레리움을 이용한 로그인과 데이터 크롤링예제 윈도우 10
페이지 정보
작성자 김영준 작성일20-12-22 14:01 조회949회 댓글0건본문
1. 파이썬 설치 윈도우
2. 패키지 실행
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py.
pip install openpyxl
pip install numpy
pip install selenium
pip install requests
from selenium import webdriver as wd
from selenium.webdriver.common.by import By
import openpyxl as excel
import time
import numpy as np
driver = wd.Chrome(executable_path="chromedriver.exe")
session_id = driver.session_id
url = "사이트url";
driver.get(url)
driver.execute_script("로그인페이지 이동")
elem = driver.find_element_by_id("아이디")
elem = driver.find_element_by_id("비번")
elem.send_keys("아이디값")
elem.send_keys("비번값")
driver.execute_script('로그인 처리하는 js')
wb = excel.Workbook()
sheet = wb.active
num = 1
sheet['A'+str(num)] = '값1'
sheet['B'+str(num)] = '값2'
sheet['C'+str(num)] = '값3'
sheet['D'+str(num)] = '값4'
sheet['E'+str(num)] = '값5'
sheet['F'+str(num)] = '값6'
sheet['G'+str(num)] = '값7'
sheet['H'+str(num)] = '값8'
sheet['I'+str(num)] = '값9'
sheet['J'+str(num)] = '값10'
sheet['K'+str(num)] = '값11'
sheet['L'+str(num)] = '값12'
sheet['M'+str(num)] = '값13'
sheet['N'+str(num)] = '값14'
for i in range(1,66):
driver.execute_script('페이징처리하는값')
roomslist = driver.find_element_by_id('리스트아이디')
roomslist_li = roomslist.find_elements_by_tag_name("li")
for val in roomslist_li:
num += 1
val.click()
time.sleep(1)
main = driver.find_elements_by_class_name("클레스이름")
if(len(main) > 0):
sheet['C'+str(num)] = main[0].text + main[1].text
if(len(main) > 3):
sheet['D'+str(num)] = main[2].text + main[3].text
sheet['A'+str(num)] = driver.find_element_by_id("id1").text
sheet['B'+str(num)] = driver.find_element_by_id("id1").text
sheet['E'+str(num)] = driver.find_element_by_id("id1").text
sheet['F'+str(num)] = driver.find_element_by_id("id1").text
sheet['G'+str(num)] = driver.find_element_by_id("id1").text
sheet['H'+str(num)] = driver.find_element_by_id("id1").text
sheet['I'+str(num)] = driver.find_element_by_id("id1").text
sheet['J'+str(num)] = driver.find_element_by_id("id1").text
sheet['K'+str(num)] = driver.find_element_by_id("id1").text
sheet['L'+str(num)] = driver.find_element_by_id("id1").text
sheet['M'+str(num)] = driver.find_element_by_id("id1").text
sheet['N'+str(num)] = driver.find_element_by_id("id1").text
driver.execute_script('window.history.back()')
time.sleep(1)
#테스트로 브레이크 처리했고 나중에 실코드에서는 풀어서 씀
break
wb.save('info20201207.xlsx')