Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import tensorflow as tf | |
| import numpy as np | |
| import pandas as pd | |
| import json | |
| from transformers import * | |
| from tqdm import tqdm | |
| from tensorflow.python.client import device_lib | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import time | |
| import instaloader | |
| from instaloader import Post | |
| PATH = './checkpoint-18750/' | |
| SEQ_LEN = 128 | |
| tokenizer = AutoTokenizer.from_pretrained("klue/bert-base") | |
| def create_sentiment_bert(): | |
| # ๋ฒํธ pretrained ๋ชจ๋ธ ๋ก๋ | |
| model = TFAutoModel.from_pretrained(PATH,local_files_only=True) | |
| # ํ ํฐ ์ธํ, ๋ง์คํฌ ์ธํ, ์ธ๊ทธ๋จผํธ ์ธํ ์ ์ | |
| token_inputs = tf.keras.layers.Input((SEQ_LEN,), dtype=tf.int32, name='input_word_ids') | |
| mask_inputs = tf.keras.layers.Input((SEQ_LEN,), dtype=tf.int32, name='input_masks') | |
| segment_inputs = tf.keras.layers.Input((SEQ_LEN,), dtype=tf.int32, name='input_segment') | |
| # ์ธํ์ด [ํ ํฐ, ๋ง์คํฌ, ์ธ๊ทธ๋จผํธ]์ธ ๋ชจ๋ธ ์ ์ | |
| bert_outputs = model([token_inputs, mask_inputs, segment_inputs]) | |
| bert_outputs = bert_outputs[1] | |
| sentiment_first = tf.keras.layers.Dense(1, activation='sigmoid', kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02))(bert_outputs) | |
| sentiment_model = tf.keras.Model([token_inputs, mask_inputs, segment_inputs], sentiment_first) | |
| sentiment_model.compile(loss=tf.keras.losses.BinaryCrossentropy(), metrics = ['accuracy']) | |
| return sentiment_model | |
| def sentence_convert_data(data): | |
| global tokenizer | |
| tokens, masks, segments = [], [], [] | |
| token = tokenizer.encode(data, max_length=SEQ_LEN, truncation=True, padding='max_length') | |
| num_zeros = token.count(0) | |
| mask = [1]*(SEQ_LEN-num_zeros) + [0]*num_zeros | |
| segment = [0]*SEQ_LEN | |
| tokens.append(token) | |
| segments.append(segment) | |
| masks.append(mask) | |
| tokens = np.array(tokens) | |
| masks = np.array(masks) | |
| segments = np.array(segments) | |
| return [tokens, masks, segments] | |
| def evaluation_predict(sentence): | |
| data_x = sentence_convert_data(sentence) | |
| predict = sentiment_model.predict(data_x) | |
| predict_value = np.ravel(predict) | |
| # 0:๋ถ์ , 1:๊ธ์ | |
| predict_answer = np.round(predict_value,0).item() | |
| return predict_answer | |
| def get_comments(news_url): | |
| if ('naver' in news_url): | |
| # oid, aid ์ถ์ถ | |
| list = news_url.split("/") | |
| oid = list[-2] | |
| aid = list[-1] | |
| if len(aid) > 10: | |
| aid = aid[:10] | |
| # API URL ๊ตฌ์ฑ | |
| api_url = "https://apis.naver.com/commentBox/cbox/web_naver_list_jsonp.json" | |
| params = { | |
| "ticket": "news", | |
| "templateId": "default_society", | |
| "pool": "cbox5", | |
| "lang": "ko", | |
| "country": "KR", | |
| "objectId": f"news{oid},{aid}", | |
| "pageSize": 100, | |
| "indexSize": 10, | |
| "page": 1, | |
| "sort": "FAVORITE" # 'NEW'(์ต์ ์), 'FAVORITE'(์๊ณต๊ฐ์) | |
| } | |
| headers = { | |
| "User-Agent": "Mozilla/5.0", | |
| "Referer": news_url | |
| } | |
| # API ํธ์ถ ๋ฐ ๋ฐ์ดํฐ ์ฒ๋ฆฌ | |
| response = requests.get(api_url, params=params, headers=headers) | |
| content = response.text.replace("_callback(", "").replace(");", "") | |
| json_data = json.loads(content) | |
| response = requests.get(news_url) | |
| article_soup = BeautifulSoup(response.text, "html.parser") | |
| # ์ ๋ชฉ ์ถ์ถ | |
| title = article_soup.select_one("#ct > div.media_end_head.go_trans > div.media_end_head_title > h2") | |
| if title is None: | |
| title = article_soup.select_one("#content > div.end_ct > div > h2") | |
| # ๋ณธ๋ฌธ ์ถ์ถ | |
| article = article_soup.select_one("#dic_area") | |
| if article is None: | |
| article = article_soup.select_one("#articeBody") | |
| return title.text.strip(), article.text.strip(), processing_data(json_data['result']['commentList']) | |
| elif ('insta' in news_url): | |
| list = news_url.split('/') | |
| pid = '' | |
| for i in list: | |
| if len(i) == 11: | |
| pid = i | |
| L = instaloader.Instaloader() | |
| post = Post.from_shortcode(L.context, pid) | |
| try: | |
| comments = [x.text for x in post.get_comments()] | |
| except: | |
| comments = ['๋ก๊ทธ์ธ์ด ํ์ํฉ๋๋ค'] | |
| return '', post.caption, comments | |
| def processing_data(comments): | |
| comment_list = [] | |
| for comment in comments: | |
| comment_list.append(comment['contents']) | |
| comment_listR = [x for x in comment_list if x] | |
| return comment_listR | |
| def main(): | |
| global sentiment_model | |
| title = '' | |
| content = '' | |
| comments = [] | |
| sentiment_model = create_sentiment_bert() | |
| st.title("๋๊ธ ํํฐ๋ง ์๋น์ค") | |
| # URL ์ ๋ ฅ ๋ฐ๊ธฐ | |
| if "q" in st.query_params: | |
| value = st.query_params['q'] | |
| if value: | |
| url = st.text_input("url์ ์ ๋ ฅํ์ธ์",value=st.query_params['q']) | |
| title, content, comments = get_comments(url) | |
| if st.button("์คํฌ๋ฉ ์์"): | |
| if url: | |
| title, content, comments = get_comments(url) | |
| # ๊ฒฐ๊ณผ ํ์ | |
| st.subheader("์ ๋ชฉ") | |
| st.write(title) | |
| st.subheader("๋ณธ๋ฌธ ๋ด์ฉ") | |
| st.write(content) | |
| st.subheader("๋๊ธ") | |
| for comment in comments: | |
| if evaluation_predict(comment) == 1: | |
| st.write(comment) | |
| # ๊ฒฐ๊ณผ ํ์ | |
| st.subheader("์ ๋ชฉ") | |
| st.write(title) | |
| st.subheader("๋ณธ๋ฌธ ๋ด์ฉ") | |
| st.write(content) | |
| st.subheader("๋๊ธ") | |
| for comment in comments: | |
| if evaluation_predict(comment) == 1: | |
| st.write(comment) | |
| else: | |
| url = st.text_input("url์ ์ ๋ ฅํ์ธ์") | |
| if st.button("์คํฌ๋ฉ ์์"): | |
| if url: | |
| title, content, comments = get_comments(url) | |
| # ๊ฒฐ๊ณผ ํ์ | |
| st.subheader("์ ๋ชฉ") | |
| st.write(title) | |
| st.subheader("๋ณธ๋ฌธ ๋ด์ฉ") | |
| st.write(content) | |
| st.subheader("๋๊ธ") | |
| for comment in comments: | |
| if evaluation_predict(comment) == 1: | |
| st.write(comment) | |
| else: | |
| url = st.text_input("url์ ์ ๋ ฅํ์ธ์") | |
| if st.button("์คํฌ๋ฉ ์์"): | |
| if url: | |
| title, content, comments = get_comments(url) | |
| # ๊ฒฐ๊ณผ ํ์ | |
| st.subheader("์ ๋ชฉ") | |
| st.write(title) | |
| st.subheader("๋ณธ๋ฌธ ๋ด์ฉ") | |
| st.write(content) | |
| st.subheader("๋๊ธ") | |
| for comment in comments: | |
| if evaluation_predict(comment) == 1: | |
| st.write(comment) | |
| return 0 | |
| if __name__ == "__main__": | |
| main() | |