Spaces:
Paused
Paused
| from logging import getLogger | |
| from pathlib import Path | |
| import joblib | |
| import pandas as pd | |
| import streamlit as st | |
| from top2vec import Top2Vec | |
| logger = getLogger(__name__) | |
| proj_dir = Path(__file__).parents[1] | |
| def initialization(): | |
| with st.spinner("Loading app..."): | |
| if 'model' not in st.session_state: | |
| model = Top2Vec.load('models/model.pkl') | |
| model._check_model_status() | |
| model.hierarchical_topic_reduction(num_topics=20) | |
| st.session_state.model = model | |
| st.session_state.umap_model = joblib.load(proj_dir / 'models' / 'umap.sav') | |
| logger.info("loading data...") | |
| if 'data' not in st.session_state: | |
| logger.info("loading data...") | |
| data = pd.read_csv(proj_dir / 'data' / 'data.csv') | |
| data['topic_id'] = data['topic_id'].apply(lambda x: f'{x:02d}') | |
| st.session_state.data = data | |
| st.session_state.selected_data = data | |
| st.session_state.all_topics = list(data.topic_id.unique()) | |
| if 'topics' not in st.session_state: | |
| logger.info("loading topics...") | |
| topics = pd.read_csv(proj_dir / 'data' / 'topics.csv') | |
| topics['topic_id'] = topics['topic_id'].apply(lambda x: f'{x:02d}') | |
| st.session_state.topics = topics | |
| topics_dict = topics[['topic_id', 'topic_0']].to_dict() | |
| topic_str_to_word = {topics_dict['topic_id'][i]: topics_dict['topic_0'][i] for i in range(20)} | |
| st.session_state.topic_str_to_word = topic_str_to_word | |
| if 'selected_points' not in st.session_state: | |
| st.session_state.selected_points = [] | |