Spaces:

ojs595
/

gen_predict

Sleeping

App Files Files Community

gen_predict / app.py

ojs595

Update app.py

974f1e7 verified 5 months ago

raw

history blame contribute delete

6.34 kB

	import torch
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import gradio as gr
	import pandas as pd
	import io
	from torch.utils.data import DataLoader, Dataset
	from torch.optim import AdamW
	from sklearn.model_selection import train_test_split

	# 모델과 토크나이저 로드
	MODEL_NAME = "beomi/kcbert-base"
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=3) # 3개 클래스로 변경

	# 데이터셋 클래스 정의
	class CustomDataset(Dataset):
	def __init__(self, dataframe, tokenizer, max_len=128):
	self.tokenizer = tokenizer
	self.data = dataframe
	self.max_len = max_len

	def __len__(self):
	return len(self.data)

	def __getitem__(self, index):
	item = self.data.iloc[index]
	description = str(item['description'])
	label = item['label']

	encoding = self.tokenizer.encode_plus(
	description,
	add_special_tokens=True,
	max_length=self.max_len,
	return_token_type_ids=False,
	padding='max_length',
	truncation=True,
	return_attention_mask=True,
	return_tensors='pt',
	)

	return {
	'input_ids': encoding['input_ids'].flatten(),
	'attention_mask': encoding['attention_mask'].flatten(),
	'labels': torch.tensor(label, dtype=torch.long)
	}

	# 훈련 데이터 준비 및 모델 훈련
	def train_model():
	csv_data = """description,gender
	"그는 축구를 정말 좋아하고, 근육질의 몸매를 가졌다.",남자
	"그녀는 긴 머리를 가졌고, 분홍색 원피스를 입었다.",여자
	"짧은 머리에 정장을 입은 그는 회의에 참석했다.",남자
	"아름다운 목소리로 노래하는 그녀는 가수다.",여자
	"그의 취미는 자동차 정비와 컴퓨터 게임이다.",남자
	"그녀는 섬세한 손길로 아기 인형을 만들었다.",여자
	"군대에서 막 제대한 그는 씩씩해 보였다.",남자
	"그녀는 친구들과 수다 떠는 것을 좋아한다.",여자
	"강력한 리더십으로 팀을 이끄는 모습이 인상적이었다.",남자
	"자신이 직접 만든 쿠키를 주변에 나누어주곤 한다.",여자
	"그들은 책 읽기를 좋아하고 조용한 성격이다.",중성
	"키가 크고 체격이 좋으며 운동을 즐긴다.",중성
	"요리와 청소를 모두 잘하며 집안일을 도맡아 한다.",중성
	"컴퓨터 프로그래밍과 뜨개질을 모두 취미로 한다.",중성
	"차분한 성격으로 상담을 잘해주는 편이다.",중성
	"독서와 영화감상을 즐기는 문화 애호가이다.",중성
	"""

	data = pd.read_csv(io.StringIO(csv_data))
	# 3개 클래스로 라벨 변경: 남자=0, 여자=1, 중성=2
	data['label'] = data['gender'].apply(lambda x: 0 if x == '남자' else (1 if x == '여자' else 2))
	train_data, _ = train_test_split(data, test_size=0.2, random_state=42)

	train_dataset = CustomDataset(train_data, tokenizer)
	train_loader = DataLoader(train_dataset, batch_size=2)

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model.to(device)

	optimizer = AdamW(model.parameters(), lr=5e-5)

	print("모델 훈련 시작...")
	model.train()
	for epoch in range(3):
	for batch in train_loader:
	optimizer.zero_grad()
	input_ids = batch['input_ids'].to(device)
	attention_mask = batch['attention_mask'].to(device)
	labels = batch['labels'].to(device)

	outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
	loss = outputs.loss
	loss.backward()
	optimizer.step()
	print(f"Epoch {epoch + 1} 완료")

	print("모델 훈련 완료!")

	# 예측 함수
	def predict_gender(text):
	if not text.strip():
	return "텍스트를 입력해주세요."

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model.eval()

	encoding = tokenizer.encode_plus(
	text,
	add_special_tokens=True,
	max_length=128,
	return_token_type_ids=False,
	padding='max_length',
	truncation=True,
	return_attention_mask=True,
	return_tensors='pt',
	)

	input_ids = encoding['input_ids'].to(device)
	attention_mask = encoding['attention_mask'].to(device)

	with torch.no_grad():
	outputs = model(input_ids, attention_mask=attention_mask)
	probabilities = torch.nn.functional.softmax(outputs.logits, dim=1)
	prediction = torch.argmax(outputs.logits, dim=1).flatten().item()
	confidence = probabilities[0][prediction].item()

	# 3개 클래스 매핑: 0=남자, 1=여자, 2=중성
	gender_map = {0: "남자", 1: "여자", 2: "중성"}
	gender = gender_map[prediction]

	return f"예측 성별: {gender} (신뢰도: {confidence:.2%})"

	# 앱 시작 시 모델 훈련
	print("앱 초기화 중...")
	train_model()

	# Gradio 인터페이스 생성
	iface = gr.Interface(
	fn=predict_gender,
	inputs=gr.Textbox(
	lines=3,
	placeholder="성별을 예측할 텍스트를 입력하세요.\n예: '그는 축구를 좋아하고 근육질이다.'",
	label="텍스트 입력"
	),
	outputs=gr.Textbox(label="예측 결과"),
	title="🤖 AI 성별 예측기 (3분류)",
	description="입력된 텍스트를 바탕으로 성별을 예측합니다. (남자/여자/중성)",
	examples=[
	["그는 축구를 정말 좋아하고, 근육질의 몸매를 가졌다."],
	["그녀는 긴 머리를 가졌고, 분홍색 원피스를 입었다."],
	["짧은 머리에 정장을 입은 그는 회의에 참석했다."],
	["아름다운 목소리로 노래하는 그녀는 가수다."],
	["그들은 책 읽기를 좋아하고 조용한 성격이다."],
	["요리와 청소를 모두 잘하며 집안일을 도맡아 한다."]
	],
	theme=gr.themes.Soft(),
	# Google 인증을 위한 커스텀 HTML 헤드 추가
	head="""
	<meta name="google-site-verification" content="9owJnk1eK0CZKk6u6slBQwC6ts3e1GUAm_ohwPtE2BI" />
	"""
	)

	# 앱 실행
	if __name__ == "__main__":
	iface.launch()