(구글코랩) 뉴스기사 이미지 다운로드, CSV 텍스트마이닝, 페이스북 피드, 팀즈 웹훅 소스를 공유합니다.
mport os
import re
import time
import facebook
import requests
import configparser
import urllib.request
import pandas as pd
from PIL import Image
from bs4 import BeautifulSoup
from datetime import datetime
from google.colab import drive
... (중략)
def write_csv(df):
savename = CSV_URI+'platum.csv'
tmp = []
tmp = savename.split('/')
tmp2 = tmp[len(tmp)-1]
if os.path.exists(savename):
print('Add data', tmp2)
df_read = pd.read_csv(savename, header=None)
last_row = df_read.tail(1)
csv_seq = last_row.iloc[:,0]
result = compare_seq(int(csv_seq.values[0]),int(df['seq'].values[0]))
else :
print('Make file', tmp2)
result = 0
if result:
print('Overlap contents!!!')
else:
df.to_csv(savename, header=False, index=False, mode='a', encoding='utf-8-sig')
return result
... (중략)
url = 'https://platum.kr'
resp = requests.get(url)
soup = BeautifulSoup(resp.text, 'lxml')
scrappy(soup)
(소스는 아래의 사이트에 공유했습니다)
댓글