Spaces:
Build error
Build error
| import os | |
| import pandas as pd | |
| #from llama_index.llms.replicate import Replicate | |
| import requests | |
| import re | |
| def extract_japan_cities(text): | |
| # ζ£θ¦θ‘¨ηΎγδ½Ώη¨γγ¦ " - Japan" γ§η΅γγι½εΈεγζ½εΊ | |
| pattern = r'(\b\w+\s*\w*\b) - Japan' | |
| cities = re.findall(pattern, text) | |
| unique_cities = list(set(cities)) | |
| # γ¦γγΌγ―γͺι½εΈεγγ½γΌγγγ¦γ«γ³γγ§εΊεγγγζεεγ«ε€ζ | |
| unique_cities.sort() | |
| return ', '.join(unique_cities) | |
| def fetch_clinical_trials(cancer_name): | |
| search_expr="%s SEARCH[Location](AREA[LocationCountry]Japan AND AREA[LocationStatus]Recruiting)" % (cancer_name) | |
| # Initial URL for the first API call | |
| base_url = "https://clinicaltrials.gov/api/v2/studies" | |
| params = { | |
| "query.titles": search_expr, | |
| "pageSize": 100 | |
| } | |
| # Initialize an empty list to store the data | |
| data_list = [] | |
| # Loop until there is no nextPageToken | |
| while True: | |
| # Print the current URL (for debugging purposes) | |
| print("Fetching data from:", base_url + '?' + '&'.join([f"{k}={v}" for k, v in params.items()])) | |
| # Send a GET request to the API | |
| response = requests.get(base_url, params=params) | |
| # Check if the request was successful | |
| if response.status_code == 200: | |
| data = response.json() # Parse JSON response | |
| studies = data.get('studies', []) # Extract the list of studies | |
| # Loop through each study and extract specific information | |
| for study in studies: | |
| # Safely access nested keys | |
| nctId = study['protocolSection']['identificationModule'].get('nctId', 'Unknown') | |
| startDate = study['protocolSection']['statusModule'].get('startDateStruct', {}).get('date', 'Unknown Date') | |
| conditions = ', '.join(study['protocolSection']['conditionsModule'].get('conditions', ['No conditions listed'])) | |
| title = study['protocolSection']['identificationModule'].get('briefTitle', 'no title') | |
| summary = study['protocolSection']['descriptionModule'].get('briefSummary', 'no summary') | |
| # Extract locations safely | |
| locations_list = study['protocolSection'].get('contactsLocationsModule', {}).get('locations', []) | |
| locations = ', '.join([f"{location.get('city', 'No City')} - {location.get('country', 'No Country')}" for location in locations_list]) if locations_list else "No locations listed" | |
| JapanesLocations = extract_japan_cities(locations) | |
| # Extract dates and phases | |
| primaryCompletionDate = study['protocolSection']['statusModule'].get('primaryCompletionDateStruct', {}).get('date', 'Unknown Date') | |
| phases = ', '.join(study['protocolSection']['designModule'].get('phases', ['Not Available'])) | |
| eligibilityCriteria = study['protocolSection']['eligibilityModule'].get('eligibilityCriteria', 'Unknown') | |
| # Append the data to the list as a dictionary | |
| data_list.append({ | |
| "NCTID": nctId, | |
| "Title": title, | |
| #"Start Date": startDate, | |
| "Primary Completion Date": primaryCompletionDate, | |
| #"Conditions": conditions, | |
| "Cancer": conditions, | |
| "Summary": summary, | |
| "Japanes Locations": JapanesLocations, | |
| #"Phases": phases, | |
| "Eligibility Criteria": eligibilityCriteria | |
| }) | |
| # Check for nextPageToken and update the params or break the loop | |
| nextPageToken = data.get('nextPageToken') | |
| if nextPageToken: | |
| params['pageToken'] = nextPageToken # Set the pageToken for the next request | |
| else: | |
| break # Exit the loop if no nextPageToken is present | |
| else: | |
| print("Failed to fetch data. Status code:", response.status_code) | |
| break | |
| # Create a DataFrame from the list of dictionaries | |
| df = pd.DataFrame(data_list) | |
| return df | |
| def fetch_clinical_trials_jp(cancer_name): | |
| search_expr="%s SEARCH[Location](AREA[LocationCountry]Japan AND AREA[LocationStatus]Recruiting)" % (cancer_name) | |
| # Initial URL for the first API call | |
| base_url = "https://clinicaltrials.gov/api/v2/studies" | |
| params = { | |
| "query.titles": search_expr, | |
| "pageSize": 100 | |
| } | |
| # Initialize an empty list to store the data | |
| data_list = [] | |
| # Loop until there is no nextPageToken | |
| while True: | |
| # Print the current URL (for debugging purposes) | |
| print("Fetching data from:", base_url + '?' + '&'.join([f"{k}={v}" for k, v in params.items()])) | |
| # Send a GET request to the API | |
| response = requests.get(base_url, params=params) | |
| # Check if the request was successful | |
| if response.status_code == 200: | |
| data = response.json() # Parse JSON response | |
| studies = data.get('studies', []) # Extract the list of studies | |
| # Loop through each study and extract specific information | |
| for study in studies: | |
| # Safely access nested keys | |
| nctId = study['protocolSection']['identificationModule'].get('nctId', 'Unknown') | |
| startDate = study['protocolSection']['statusModule'].get('startDateStruct', {}).get('date', 'Unknown Date') | |
| conditions = ', '.join(study['protocolSection']['conditionsModule'].get('conditions', ['No conditions listed'])) | |
| title = study['protocolSection']['identificationModule'].get('briefTitle', 'no title') | |
| summary = study['protocolSection']['descriptionModule'].get('briefSummary', 'no summary') | |
| # Extract locations safely | |
| locations_list = study['protocolSection'].get('contactsLocationsModule', {}).get('locations', []) | |
| locations = ', '.join([f"{location.get('city', 'No City')} - {location.get('country', 'No Country')}" for location in locations_list]) if locations_list else "No locations listed" | |
| JapanesLocations = extract_japan_cities(locations) | |
| # Extract dates and phases | |
| primaryCompletionDate = study['protocolSection']['statusModule'].get('primaryCompletionDateStruct', {}).get('date', 'Unknown Date') | |
| phases = ', '.join(study['protocolSection']['designModule'].get('phases', ['Not Available'])) | |
| eligibilityCriteria = study['protocolSection']['eligibilityModule'].get('eligibilityCriteria', 'Unknown') | |
| # Append the data to the list as a dictionary | |
| data_list.append({ | |
| "NCTID": nctId, | |
| "γΏγ€γγ«": title, | |
| #"Start Date": startDate, | |
| #"Primary Completion Date": primaryCompletionDate, | |
| "対豑γ¨γͺγη": conditions, | |
| "γ΅γγͺγΌ": summary, | |
| "ε ΄ζ": JapanesLocations, | |
| #"Phases": phases, | |
| "γ―γ©γ€γγͺγ’": eligibilityCriteria | |
| }) | |
| # Check for nextPageToken and update the params or break the loop | |
| nextPageToken = data.get('nextPageToken') | |
| if nextPageToken: | |
| params['pageToken'] = nextPageToken # Set the pageToken for the next request | |
| else: | |
| break # Exit the loop if no nextPageToken is present | |
| else: | |
| print("Failed to fetch data. Status code:", response.status_code) | |
| break | |
| # Create a DataFrame from the list of dictionaries | |
| df = pd.DataFrame(data_list) | |
| return df | |