From aea4fc1ad0d52075d413313c5fcaca8b9774f294 Mon Sep 17 00:00:00 2001 From: "Sergio G." Date: Sun, 27 Jul 2025 14:10:04 +0200 Subject: [PATCH] feat: add more translation languages --- README.md | 43 +++++++++++++++++++++++++++++++++------- app/summarizer.py | 50 +++++++++++++++++++++++++++++++++++++++++++++-- app/translator.py | 13 +++++++++--- app/webui.py | 23 ++++++++++++++-------- 4 files changed, 109 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 1ba3d3f..3327d70 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Easy Webpage Summarizer -A Python script designed to summarize webpages from specified URLs using the LangChain framework and the ChatOllama model. It leverages advanced language models to generate detailed summaries, making it an invaluable tool for quickly understanding the content of web-based documents. +A Python script designed to summarize webpages from specified URLs using the LangChain framework and the ChatOllama model. It leverages advanced language models to generate detailed summaries and translate them to multiple languages, making it an invaluable tool for quickly understanding the content of web-based documents. ## Requirements @@ -17,23 +17,47 @@ pip install -r requirements.txt ## Features - Summarization of webpages and youtube videos directly from URLs. -- Translates to Turkish language (other languages will be added soon!) -- Integration with LangChain and ChatOllama for state-of-the-art summarization. +- **Translation to multiple languages** with language selection +- Integration with LangChain and ChatOllama for state-of-the-art summarization and translation. - Command-line interface for easy use and integration into workflows. +- Web interface with language selection dropdown. ## Usage +### Command Line Interface + To use the webpage summarizer, run the script from the command line, providing the URL of the document you wish to summarize: ```bash -python summarizer.py -u "http://example.com/document" +# Basic summarization only +python app/summarizer.py -u "http://example.com/document" + +# Summarize and translate to Turkish (default) +python app/summarizer.py -u "http://example.com/document" -t "Turkish" + +# Summarize and translate to French +python app/summarizer.py -u "http://example.com/document" -t "Spanish" + +# Summarize and translate to German +python app/summarizer.py -u "http://example.com/document" -t "German" ``` Replace `http://example.com/document` with the actual URL of the document you want to summarize. +#### Available Languages + +The following languages are supported for translation: +- Turkish (default) +- French +- German +- Italian +- Portuguese +- Spanish +- English + ### Web UI -To use the webpage summarizer in you web browser, you can also try gradio app. +To use the webpage summarizer in your web browser, you can also try the gradio app: ```bash python app/webui.py @@ -41,6 +65,11 @@ python app/webui.py ![gradio](assets/gradio.png) +The web interface includes: +- URL input for summarization +- Language selection dropdown (appears after generating summary) +- Translate button to convert summary to selected language + ## Docker ```bash @@ -51,14 +80,14 @@ docker run -p 7860:7860 web_summarizer docker run -d --network='host' -p 7860:7860 web_summarizer ``` - ## Development To contribute to the development of this script, clone the repository, make your changes, and submit a pull request. We welcome contributions that improve the script's functionality or extend its capabilities. - [x] Summarize youtube videos - [x] Dockerize project -- [ ] Translate to different languages +- [x] Translate to different languages +- [x] Language selection for translations - [ ] Streaming text output on gradio - [ ] Serve on web diff --git a/app/summarizer.py b/app/summarizer.py index dd4a191..07167b9 100644 --- a/app/summarizer.py +++ b/app/summarizer.py @@ -9,11 +9,15 @@ def setup_argparse(): """Setup argparse to parse command line arguments.""" parser = argparse.ArgumentParser( - description="Summarize a document from a given URL." + description="Summarize a document from a given URL and optionally translate it." ) parser.add_argument( "-u", "--url", required=True, help="URL of the document to summarize" ) + parser.add_argument( + "-t", "--translate", + help="Target language for translation (if specified, translation is enabled)" + ) return parser.parse_args() @@ -50,12 +54,54 @@ def setup_summarization_chain(): return llm_chain +def setup_translation_chain(target_language="Turkish"): + """Setup the translation chain with a prompt template and ChatOllama.""" + prompt_template = PromptTemplate( + template="""Translate the following text into {target_language}. Provide only the translation without any quotes, headers, or additional text. The output should be clean and direct: + +{text} + +TRANSLATION:""", + input_variables=["text", "target_language"], + ) + + llm = ChatOllama(model="llama3:instruct", base_url="http://127.0.0.1:11434") + llm_chain = LLMChain(llm=llm, prompt=prompt_template) + return llm_chain + + +def translate_text(text, target_language="Spanish"): + """Translate text to the specified target language.""" + llm_chain = setup_translation_chain(target_language) + result = llm_chain.run({"text": text, "target_language": target_language}) + return result + + def main(): args = setup_argparse() docs = load_document(args.url) + # Generate summary llm_chain = setup_summarization_chain() - result = llm_chain.run(docs) + summary = llm_chain.run(docs) + + print("=" * 60) + print("SUMMARY") + print("=" * 60) + print(summary) + print() + + # Translate if language is specified + if args.translate: + print("=" * 60) + print(f"TRANSLATION TO {args.translate.upper()}") + print("=" * 60) + try: + translation = translate_text(summary, args.translate) + print(translation) + except Exception as e: + print(f"Translation failed: {e}") + print("Make sure Ollama is running with the llama3:instruct model.") if __name__ == "__main__": diff --git a/app/translator.py b/app/translator.py index a3fcd5f..fdd91a1 100644 --- a/app/translator.py +++ b/app/translator.py @@ -3,17 +3,24 @@ from langchain_community.chat_models import ChatOllama -def setup_translator_chain(): +def setup_translator_chain(target_language="Turkish"): """Setup the translation chain with a prompt template and ChatOllama.""" prompt_template = PromptTemplate( - template="""As a professional translator, provide a detailed and comprehensive translation of the provided text into turkish, ensuring that the translation is accurate, coherent, and faithful to the original text. + template="""As a professional translator, provide a detailed and comprehensive translation of the provided text into {target_language}, ensuring that the translation is accurate, coherent, and faithful to the original text. "{text}" DETAILED TRANSLATION:""", - input_variables=["text"], + input_variables=["text", "target_language"], ) llm = ChatOllama(model="llama3:instruct", base_url="http://127.0.0.1:11434") llm_chain = LLMChain(llm=llm, prompt=prompt_template) return llm_chain + + +def translate_text(text, target_language="Turkish"): + """Translate text to the specified target language.""" + llm_chain = setup_translator_chain(target_language) + result = llm_chain.run({"text": text, "target_language": target_language}) + return result diff --git a/app/webui.py b/app/webui.py index a48c76a..f4215bb 100644 --- a/app/webui.py +++ b/app/webui.py @@ -1,7 +1,7 @@ import gradio as gr from summarizer import load_document, setup_summarization_chain -from translator import setup_translator_chain +from translator import translate_text from yt_summarizer import check_link, summarize_video @@ -13,12 +13,11 @@ def summarize(url): llm_chain = setup_summarization_chain() result = llm_chain.run(docs) - return [result, gr.Button("🇹🇷 Translate ", visible=True)] + return [result, gr.Button("Translate ", visible=True), gr.Dropdown(visible=True)] -def translate(text): - llm_chain = setup_translator_chain() - result = llm_chain.run(text) +def translate(text, target_language): + result = translate_text(text, target_language) return result @@ -35,7 +34,15 @@ def translate(text): btn_generate = gr.Button("Generate") summary = gr.Markdown(label="Summary") - btn_translate = gr.Button(visible=False) + + with gr.Row(): + btn_translate = gr.Button(visible=False) + language_dropdown = gr.Dropdown( + choices=["Spanish", "French", "German", "Italian", "Portuguese", "Turkish", "English"], + value="Turkish", + label="Target Language", + visible=False + ) gr.Examples( [ @@ -54,7 +61,7 @@ def translate(text): Repo: github.com/mertcobanov/easy-web-summarizer ```""" ) - btn_generate.click(summarize, inputs=[url], outputs=[summary, btn_translate]) - btn_translate.click(translate, inputs=[summary], outputs=[summary]) + btn_generate.click(summarize, inputs=[url], outputs=[summary, btn_translate, language_dropdown]) + btn_translate.click(translate, inputs=[summary, language_dropdown], outputs=[summary]) demo.launch(server_name="0.0.0.0")