크롤링

(3) 인스타그램 크롤링

빠스무 2023. 7. 9. 16:48
728x90
# 인스타그램을 들어가서 아이디 비밀번호를 입력후 로그인을 할 수 있다.
 
from selenium import webdriver
import time
import chromedriver_autoinstaller
from selenium.webdriver.common.by import By
 
driver = webdriver.Chrome()
 
driver.implicitly_wait(3)

driver.get(url)

id = 아이디
pw = 비밀버노


input_id = driver.find_element(By.XPATH, '/html/body/div[2]/div/div/div[1]/div/div/div/div[1]/section/main/article/div[2]/div[1]/div[2]/form/div/div[1]/div/label/input')
input_pw = driver.find_element(By.XPATH, '/html/body/div[2]/div/div/div[1]/div/div/div/div[1]/section/main/article/div[2]/div[1]/div[2]/form/div/div[2]/div/label/input')

input_id.send_keys(id)
input_pw.send_keys(pw)

driver.find_element(By.XPATH, '/html/body/div[2]/div/div/div[1]/div/div/div/div[1]/section/main/article/div[2]/div[1]/div[2]/form/div/div[4]/button/div').click()
time.sleep(5)

2. 해시태그 검색

# 술스타그램 이라는 해쉬태그를 검색
 
hashtag = '술스타그램'
driver.get(url)
time.sleep(5)

3. 스크롤 내리기

# 스크롤을 자동으로 내려준다.
 
for _ in range(5):
    driver.execute_script('window.scrollTo(0,document.body.scrollHeight)')
    time.sleep(5)

4. 원하는 사진 클릭하기

# 태그로 원하는 사진을 클릭한다.
 
xpath = '/html/body/div[2]/div/div/div[1]/div/div/div/div[1]/div[1]/div[2]/section/main/article/div[2]/div/div[10]/div[3]/a'
driver.find_element(By.XPATH, xpath).click()
time.sleep(3)

5. 좋아요 클릭, 댓글남기기

like_xpath = '/html/body/div[2]/div/div/div[2]/div/div/div[1]/div/div[3]/div/div/div/div/div[2]/div/article/div/div[2]/div/div/div[2]/section[1]/span[1]/button'
driver.find_element(By.XPATH, like_xpath).click()
time.sleep(3)
 
comment = '맛있겠다'
comment_xpath = '/html/body/div[2]/div/div/div[2]/div/div/div[1]/div/div[3]/div/div/div/div/div[2]/div/article/div/div[2]/div/div/div[2]/section[3]/div/form/div/textarea'
driver.find_element(By.XPATH, comment_xpath).click()
driver.find_element(By.XPATH, comment_xpath).send_keys(comment)
time.sleep(3)
 
send_xpath = '/html/body/div[2]/div/div/div[2]/div/div/div[1]/div/div[3]/div/div/div/div/div[2]/div/article/div/div[2]/div/div/div[2]/section[3]/div/form/div/div[2]'
driver.find_element(By.XPATH, send_xpath).click()
time.sleep(3)
 
next_xpath = '/html/body/div[2]/div/div/div[2]/div/div/div[1]/div/div[3]/div/div/div/div/div[1]/div/div/div[2]/button'
driver.find_element(By.XPATH, next_xpath).click()
time.sleep(3)

6. 함수로 리팩토링

# 로그인
def login(id, pw):
    input_id = driver.find_element(By.XPATH, '/html/body/div[2]/div/div/div[1]/div/div/div/div[1]/section/main/article/div[2]/div[1]/div[2]/form/div/div[1]/div/label/input')
    input_pw = driver.find_element(By.XPATH, '/html/body/div[2]/div/div/div[1]/div/div/div/div[1]/section/main/article/div[2]/div[1]/div[2]/form/div/div[2]/div/label/input')
    input_id.send_keys(id)
    input_pw.send_keys(pw)
    driver.find_element(By.XPATH, '/html/body/div[2]/div/div/div[1]/div/div/div/div[1]/section/main/article/div[2]/div[1]/div[2]/form/div/div[3]/button').click()
# 해시태그 검색
def search(hashtag, scroll_times):
    driver.get(url)
    time.sleep(5)
    for _ in range(scroll_times):
        driver.execute_script('window.scrollTo(0, document.body.scrollHeight)')
        time.sleep(5)
# 좋아요 및 댓글달기(어떤사진을 선택할지 index, 댓글, 반복숫자)
def like_and_comment(nth, comment, repeat=1):
    row = (nth-1) // 3 + 1
    col = (nth-1) % 3 + 1
    xpath = f'/html/body/div[2]/div/div/div[2]/div/div/div/div[1]/div[1]/div[2]/section/main/article/div[2]/div/div[{row}]/div[{col}]/a/div'
    driver.find_element(By.XPATH, xpath).click()
    time.sleep(3)
    for i in range(repeat):
        like_xpath = '/html/body/div[2]/div/div/div[3]/div/div/div[1]/div/div[3]/div/div/div/div/div[2]/div/article/div/div[2]/div/div/div[2]/section[1]/span[1]/button'
        driver.find_element(By.XPATH, like_xpath).click()
        time.sleep(2)
        comment_xpath = '/html/body/div[2]/div/div/div[3]/div/div/div[1]/div/div[3]/div/div/div/div/div[2]/div/article/div/div[2]/div/div/div[2]/section[3]/div/form/div/textarea'
        driver.find_element(By.XPATH, comment_xpath).click()
        driver.find_element(By.XPATH, comment_xpath).send_keys(comment)
        time.sleep(3)
        send_xpath = '/html/body/div[2]/div/div/div[3]/div/div/div[1]/div/div[3]/div/div/div/div/div[2]/div/article/div/div[2]/div/div/div[2]/section[3]/div/form/div/div[2]/div'
        driver.find_element(By.XPATH, send_xpath).click()
        time.sleep(3)
        if i+1 < repeat:
            next_xpath = '/html/body/div[2]/div/div/div[3]/div/div/div[1]/div/div[3]/div/div/div/div/div[1]/div/div/div[2]/button'
            driver.find_element(By.XPATH, next_xpath).click()
            time.sleep(3)
driver = webdriver.Chrome()

driver.implicitly_wait(3)

driver.get(url)
search_naver = '/html/body/app/layout/div[3]/div[2]/shrinkable-layout/div/app-base/search-input-box/div/div[1]/div/input'

comment = '역삼 일본라멘'
driver.find_element(By.XPATH, search_naver).click()
driver.find_element(By.XPATH, search_naver).send_keys(comment)

clikc = '/html/body/app/layout/div[3]/div[2]/shrinkable-layout/div/app-base/search-input-box/div/div[1]/button'
driver.find_element(By.XPATH, clikc).click()