Instagram Crawling

Python Crawling Practice

  • Example with Instagram with Selenium module in the python:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import time
from bs4 import BeautifulSoup
import urllib

driver = webdriver.Chrome('./chromedriver_win32/chromedriver.exe')
url = 'https://www.instagram.com/'
driver.get(url)
time.sleep(4)

ins_id = '[Enter your email for instagram id]' #or use 'input' command here!
pw = '[Enter your email for instagram pw]'

driver.find_element_by_xpath('[copy xpath here]').send_keys(ins_id)
driver.find_element_by_xpath('[copy xpath here]'.send_keys(pw)
driver.find_element_by_xpath('[copy xpath here]').click()
time.sleep(3)

driver.find_element_by_xpath('[copy xpath here]').click()
time.sleep(3)

driver.find_element_by_xpath('[copy xpath here]').click()
time.sleep(4)

key_word = input('키워드를 입력하세요 : ')
driver.find_element_by_xpath('[copy xpath here]').send_keys(key_word)
time.sleep(4)

driver.find_element_by_xpath('[copy xpath here]').click()

for i in range(50):
if i == 0:
driver.find_element_by_xpath('[copy xpath here]').click()

html = BeautifulSoup(driver.page_source, 'html.parser')
html.select('div > div.C4VMK > span')

tag = [tag.text.strip('#') for tag in html.select('a.xil3i')]
comment = [comment.text for comment in html.select('div > div.C4VMK > span')]
like = [l.text for l in html.select('section.EDfFK.ygqzn > div > div > a > span')]

image = driver.find_element_by_css_selector('[copy selector here related to the image]')
image_url = image.get_attribute('src')
urllib.request.urlretrieve(image_url, './test_image.jpg')
time.sleep(5)
print('크롤링 시작')
driver.find_element_by_xpath('[copy xpath here]').click()

else:
html = BeautifulSoup(driver.page_source, 'html.parser')
html.select('div > div.C4VMK > span')

total_tag = [tag.text.strip('#') for tag in html.select('a.xil3i')]
extra_comment = [comment.text for comment in html.select('div > div.C4VMK > span')]
total_like = [l.text for l in html.select('section.EDfFK.ygqzn > div > div > a > span')]

try:
image = driver.find_element_by_css_selector('[copy selector here related to the image]')
image_url = image.get_attribute('src')
urllib.request.urlretrieve(image_url, f'./test_image{i}.jpg')

except Exception as error:
image = driver.find_element_by_css_selector('[copy selector here related to the image]')
image_url = image.get_attribute('src')
urllib.request.urlretrieve(image_url, f'./test_image{i}.jpg')

driver.find_element_by_xpath('[copy xpath here]').click()
time.sleep(10)
print(f'크롤링 중 {1+i}')
  • No unauthorized usage and copy (This is against!)