Spaces:

fffiloni
/

Image-to-Story

Running on Zero

fffiloni commited on Aug 4, 2023

Commit

7175dd2

1 Parent(s): 3e63247

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,6 +7,10 @@ from transformers import BlipProcessor, BlipForConditionalGeneration
 processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
 model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large", torch_dtype=torch.float16).to("cuda")
 def infer(image_input):
     #img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
     raw_image = Image.open(image_input).convert('RGB')
@@ -19,7 +23,26 @@ def infer(image_input):
     caption = processor.decode(out[0], skip_special_tokens=True)
     print(caption)
-    return caption
 css="""
 #col-container {max-width: 910px; margin-left: auto; margin-right: auto;}
@@ -39,7 +62,8 @@ with gr.Blocks(css=css) as demo:
         )
         image_in = gr.Image(label="Image input", type="filepath")
         submit_btn = gr.Button('Sumbit')
-        story = gr.Textbox(label="Generated Story")
-    submit_btn.click(fn=infer, inputs=[image_in], outputs=[story])
 demo.queue().launch()

 processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
 model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large", torch_dtype=torch.float16).to("cuda")
+hf_token = os.environ.get('HF_TOKEN')
+from gradio_client import Client
+client = Client("https://fffiloni-test-llama-api.hf.space/", hf_token=hf_token)
 def infer(image_input):
     #img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
     raw_image = Image.open(image_input).convert('RGB')
     caption = processor.decode(out[0], skip_special_tokens=True)
     print(caption)
+    llama_q = f"""
+    I'll give you a simple image caption, from i want you to provide a story that would fit well with the image.
+    Here's the music description :
+    {caption}
+    """
+    result = client.predict(
+    				llama_q,	# str in 'Message' Textbox component
+    				api_name="/predict"
+    )
+    print(f"Llama2 result: {result}")
+    return caption, result
 css="""
 #col-container {max-width: 910px; margin-left: auto; margin-right: auto;}
         )
         image_in = gr.Image(label="Image input", type="filepath")
         submit_btn = gr.Button('Sumbit')
+        caption = gr.Textbox(label="Generated Caption")
+        story = gr.Textbox(label="generated Story")
+    submit_btn.click(fn=infer, inputs=[image_in], outputs=[caption, story])
 demo.queue().launch()