produces the correct html code and summary. Next objective is to be able to push it to wordpress directly from this gradio UI.

This commit is contained in:
shahab00x 2024-02-19 23:44:01 +03:30
parent a7452b5d4b
commit 0728ec31e8
3 changed files with 27 additions and 13 deletions

BIN
requirements.txt Normal file

Binary file not shown.

View File

@ -6,6 +6,7 @@ from selectorlib import Extractor
import re import re
import pyperclip import pyperclip
from openai import OpenAI from openai import OpenAI
import os import os
@ -20,12 +21,12 @@ class AmazonScraper:
'https': f'socks5h://{PROXY_HOST}:{PROXY_PORT}' 'https': f'socks5h://{PROXY_HOST}:{PROXY_PORT}'
} }
HEADERS = { self.HEADERS = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36',
'Accept-Language': 'en-US, en;q=0.5' 'Accept-Language': 'en-US, en;q=0.5'
} }
self.HEADERS = { HEADERS = {
'authority': 'www.amazon.com', 'authority': 'www.amazon.com',
'pragma': 'no-cache', 'pragma': 'no-cache',
'cache-control': 'no-cache', 'cache-control': 'no-cache',
@ -100,7 +101,9 @@ class AmazonScraper:
print(review_data) print(review_data)
print(product_info) print(product_info)
print(product_info['images'], type(product_info['images'])) print(product_info['images'], type(product_info['images']))
self.images = eval(product_info['images']) self.images = eval(product_info['images'])
print(self.images) print(self.images)
@ -135,6 +138,7 @@ class AIInterface:
base_url='https://api.together.xyz', base_url='https://api.together.xyz',
) )
# client._proxies
chat_completion = client.chat.completions.create( chat_completion = client.chat.completions.create(
messages=[ messages=[
{ {

View File

@ -7,17 +7,17 @@ llms = ['meta-llama/Llama-2-70b-chat-hf', "mistralai/Mixtral-8x7B-Instruct-v0.1"
scraper = AmazonScraper() scraper = AmazonScraper()
aii = AIInterface() aii = AIInterface()
def replace_img_tag(html_text, img_dict): def replace_img_tag(url, html_text, img_dict):
# Regular expression pattern to find <img> tags # Regular expression pattern to find <img> tags
img_tag_pattern = r'<img src="(.*?)">' img_tag_pattern = r'<img src="(.*?)">'
# Function to replace <img> tag # Function to replace <img> tag
def replacer(match): def replacer(match, url=url):
img_url = match.group(1) img_url = match.group(1)
srcset = ', '.join(f'{url} {w}w' for url, (w, h) in img_dict.items()) srcset = ', '.join(f'{url} {w}w' for url, (w, h) in img_dict.items())
replacement = f'''<div style="display: flex; justify-content: center;"> replacement = f'''<div style="display: flex; justify-content: center;">
<figure> <figure>
<a href="{img_url}"><img src="{img_url}" srcset="{srcset}" sizes="(max-width: 600px) 100vw, 600px" alt="Image from Amazon.com"></a> <a href="{url}"><img src="{img_url}" srcset="{srcset}" sizes="(max-width: 600px) 100vw, 600px" alt="Image from Amazon.com"></a>
<figcaption style="text-align: center;">Image from Amazon.com</figcaption> <figcaption style="text-align: center;">Image from Amazon.com</figcaption>
</figure> </figure>
</div>''' </div>'''
@ -39,15 +39,25 @@ def write_article(url):
text = scraper.get_product_info_and_reviews(url) text = scraper.get_product_info_and_reviews(url)
image = list(scraper.images)[0] image = list(scraper.images)[0]
# prompt_for_ai = "Write a summary of the following product and an overview of people's experiences based on the provided reviews of it as follows. Format it nicely and professionally in HTML:\n\n" + text # prompt_for_ai = ("Write a summary of the following product and an overview of people's experiences based on the "
prompt_for_ai = f"Write an HTML code that includes summary of the following product and an overview of people's experiences based on the provided reviews of it as follows. Underneath the title in small letters write 'This page includes paid Amazon affiliate links' and Include a link to the product {url} at the very end. Also include this image {image} after the second paragraph. Format it nicely and professionally in HTML. :\n\n" + text # "provided reviews of it as follows. Format it nicely and professionally in HTML:\n\n") + text
ai_response = aii.ask_ai(prompt_for_ai, model=llms[1])
print(ai_response) prompt_for_ai = "write a succinct summary article about this product. Format it nicely in HTML:\n\n" + text
html_content = ai_response
# prompt_for_ai = f"Take the following HTML code and slightly modify it by converting the names of this product to links to {url}. Also include this image {image} after the first or second paragraph. Return a nice and professional HTML code:\n" + ai_response # prompt_for_ai = (f"Write an HTML code that includes a professionally authored article summary of the following "
# html_content = aii.ask_ai(prompt_for_ai, model=llms[1]) # f"product and an overview of people's experiences based on the provided reviews of it as "
html_content = replace_img_tag(html_content, scraper.images) # f"follows. Underneath the title add this tag <p class=\"has-small-font-size\">This page includes "
# f"paid Amazon affiliate links</p> and Include a link to the product {url} at the very end. Also "
# f"include this image {image} after the second paragraph. Format it nicely and professionally in "
# f"HTML. :\n\n") + text
html_content = aii.ask_ai(prompt_for_ai, model=llms[1])
prompt_for_ai = (f"Take the following HTML code and slightly modify it. Underneath the title add this tag '<p "
f"class=\"has-small-font-size\">This page includes paid Amazon affiliate links</p>'. Include a "
f"link {url} to the product at the end. Also include this image {image} after the first or "
f"second paragraph. Return a nice and professional HTML code:\n") + html_content
html_content = aii.ask_ai(prompt_for_ai, model=llms[1])
html_content = replace_img_tag(url, html_content, scraper.images)
print(html_content) print(html_content)
return html_content return html_content