بحث هذه المدونة الإلكترونية

الأربعاء، 15 نوفمبر 2023

from PIL import Image import pytesseract import os # Let's define a function to load images and extract text using OCR. def ocr_from_images(image_paths): data = [] for path in image_paths: # Load the image img = Image.open(path) # Use tesseract to do OCR on the image text = pytesseract.image_to_string(img) data.append(text) return data # Paths to the images uploaded by the user image_dir = '/mnt/data/' image_paths = [os.path.join(image_dir, f) for f in os.listdir(image_dir) if f.endswith('.jpeg')] # Perform OCR on all the images and save the results ocr_results = ocr_from_images(image_paths) # Since we need to sort by traffic, let's try to extract traffic values and the corresponding file names traffic_data = [] for result, path in zip(ocr_results, image_paths): # Split the result into lines and search for traffic related lines lines = result.split('\n') for line in lines: if 'Organic Search Traffic' in line or 'Organic Traffic' in line: # Try to extract the numeric value parts = line.split() for part in parts: if part.replace('.', '', 1).replace('K', '', 1).replace('M', '', 1).isdigit(): # Convert to a number, assuming 'K' is thousand and 'M' is million number = float(part.replace('K', 'e3').replace('M', 'e6')) traffic_data.append((path, number)) break # Sort the data by traffic in descending order sorted_traffic_data = sorted(traffic_data, key=lambda x: x[1], reverse=True) sorted_traffic_data

ليست هناك تعليقات:

إرسال تعليق