[Python] Telegram Bot + Web scraper (beautiful soup) 개발 중
Telegram auto uploader + basic crawler ..
ksg와 shipping news를 크롤링 하는데,
이후에 이걸 표준 모듈화해서 업로드 하려고 합니다.
token 과 chat_id 를 삭제하고 업로드했습니다.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 | import schedule import time import telegram import bs4 from urllib.request import urlopen token = "Your token here" bot = telegram.Bot(token = token) #print(bot) updates = bot.get_updates() chat_id = "Your Chat Id here" title_tag = [] text_tag = [] date_tag = [] bot.sendMessage(chat_id,"Server Intialized...") def job(): now = time.localtime() print("current time = ",str(now)) url = "https://www.shippingnewsnet.com/news/articleList.html?sc_sub_section_code=S2N1&view_type=sm" html = urlopen(url) bs_obj = bs4.BeautifulSoup(html,"html.parser") ul = bs_obj.find("ul", {"class":"type2"}) lis = ul.findAll("li") #print(lis) for li in lis: if li.find("h4",{"class":"titles"}) not in title_tag : title_tag.append(li.find("h4",{"class":"titles"})) bot.sendMessage(chat_id,title_tag[-1].text) text_tag.append(li.find("p",{"class":"lead line-6x2"})) bot.sendMessage(chat_id,text_tag[-1].text) date_tag.append(li.find("span",{"class":"byline"})) bot.sendMessage(chat_id,date_tag[-1].text[10:26]) print('\n') title_tag2 = [] text_tag2 = [] date_tag2 = [] def ksgcrawler(): now = time.localtime() url = "https://www.ksg.co.kr/news/news_list.jsp?categoryCode=LPG" html = urlopen(url) bs_obj = bs4.BeautifulSoup(html,"html.parser") div = bs_obj.find("div", {"class":"news_wrap"}) lis = div.findAll("li") #print(lis) for li in lis: if li.find("dt") not in title_tag2 : title_tag2.append(li.find("dt")) bot.sendMessage(chat_id,title_tag2[-1].text) text_tag2.append(li.find("dd")) bot.sendMessage(chat_id,text_tag2[-1].text) print('\n') schedule.every(1).minutes.do(job) schedule.every(1).minutes.do(ksgcrawler) while True: schedule.run_pending() time.sleep(1) | cs |
댓글
댓글 쓰기