بحث هذه المدونة الإلكترونية
الأربعاء، 15 نوفمبر 2023
from PIL import Image
import pytesseract
import os
# Let's define a function to load images and extract text using OCR.
def ocr_from_images(image_paths):
data = []
for path in image_paths:
# Load the image
img = Image.open(path)
# Use tesseract to do OCR on the image
text = pytesseract.image_to_string(img)
data.append(text)
return data
# Paths to the images uploaded by the user
image_dir = '/mnt/data/'
image_paths = [os.path.join(image_dir, f) for f in os.listdir(image_dir) if f.endswith('.jpeg')]
# Perform OCR on all the images and save the results
ocr_results = ocr_from_images(image_paths)
# Since we need to sort by traffic, let's try to extract traffic values and the corresponding file names
traffic_data = []
for result, path in zip(ocr_results, image_paths):
# Split the result into lines and search for traffic related lines
lines = result.split('\n')
for line in lines:
if 'Organic Search Traffic' in line or 'Organic Traffic' in line:
# Try to extract the numeric value
parts = line.split()
for part in parts:
if part.replace('.', '', 1).replace('K', '', 1).replace('M', '', 1).isdigit():
# Convert to a number, assuming 'K' is thousand and 'M' is million
number = float(part.replace('K', 'e3').replace('M', 'e6'))
traffic_data.append((path, number))
break
# Sort the data by traffic in descending order
sorted_traffic_data = sorted(traffic_data, key=lambda x: x[1], reverse=True)
sorted_traffic_data
الاشتراك في:
تعليقات الرسالة (Atom)
ليست هناك تعليقات:
إرسال تعليق