# Data Handling
import pandas as pd
import numpy as np

# IO - getting files and images from MongoDB and S3
from pymongo import MongoClient
from kaggle_secrets import UserSecretsClient
import requests

from concurrent.futures import ThreadPoolExecutor

from pathlib import Path
from PIL import Image
from io import BytesIO

import os
import re
import shutil
import json
import time

# Install speciesnet and related megadetector libraries
!pip install -Uqq speciesnet megadetector-utils

from IPython.display import display
from IPython.display import JSON

from speciesnet import SpeciesNet
import kagglehub

     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 43.3/43.3 kB 3.3 MB/s eta 0:00:00

     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.2/2.2 MB 9.1 MB/s eta 0:00:00

  Preparing metadata (setup.py) ... done

  Preparing metadata (setup.py) ... done

     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 87.2/87.2 kB 7.4 MB/s eta 0:00:00

  Preparing metadata (setup.py) ... done

   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 93.7/93.7 kB 7.1 MB/s eta 0:00:00

   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 791.3/791.3 kB 42.1 MB/s eta 0:00:00

   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 86.8/86.8 kB 6.6 MB/s eta 0:00:00

   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 8.6/8.6 MB 107.5 MB/s eta 0:00:00

   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 13.5/13.5 MB 109.0 MB/s eta 0:00:00

   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 363.4/363.4 MB 4.7 MB/s eta 0:00:00

   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 664.8/664.8 MB 2.6 MB/s eta 0:00:00

   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 211.5/211.5 MB 8.5 MB/s eta 0:00:00

   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 56.3/56.3 MB 16.8 MB/s eta 0:00:00

   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 127.9/127.9 MB 13.7 MB/s eta 0:00:00

   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 207.5/207.5 MB 8.3 MB/s eta 0:00:00

   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 21.1/21.1 MB 84.5 MB/s eta 0:00:00

   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 956.3/956.3 kB 50.8 MB/s eta 0:00:00

   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 81.0/81.0 kB 6.7 MB/s eta 0:00:00

   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 85.4/85.4 kB 6.2 MB/s eta 0:00:00

   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 66.8/66.8 kB 4.9 MB/s eta 0:00:00

   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 49.9/49.9 MB 18.1 MB/s eta 0:00:00

   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 114.7/114.7 kB 163.2 kB/s eta 0:00:00

   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 62.5/62.5 MB 3.2 MB/s eta 0:00:00

   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 7.8/7.8 MB 100.7 MB/s eta 0:00:00

  Building wheel for pyqtree (setup.py) ... done

  Building wheel for reverse_geocoder (setup.py) ... done

  Building wheel for fire (setup.py) ... done

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.

datasets 3.6.0 requires fsspec[http]<=2025.3.0,>=2023.1.0, but you have fsspec 2025.3.2 which is incompatible.

ydata-profiling 4.16.1 requires matplotlib<=3.10,>=3.5, but you have matplotlib 3.10.3 which is incompatible.

category-encoders 2.7.0 requires scikit-learn<1.6.0,>=1.0.0, but you have scikit-learn 1.6.1 which is incompatible.

cesium 0.12.4 requires numpy<3.0,>=2.0, but you have numpy 1.26.4 which is incompatible.

google-colab 1.0.0 requires google-auth==2.38.0, but you have google-auth 2.40.1 which is incompatible.

google-colab 1.0.0 requires notebook==6.5.7, but you have notebook 6.5.4 which is incompatible.

google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 2.2.3 which is incompatible.

dopamine-rl 4.1.2 requires gymnasium>=1.0.0, but you have gymnasium 0.29.0 which is incompatible.

bigframes 1.42.0 requires rich<14,>=12.4.4, but you have rich 14.0.0 which is incompatible.

google-spark-connect 0.5.2 requires google-api-core>=2.19.1, but you have google-api-core 1.34.1 which is incompatible.

google-cloud-bigtable 2.30.0 requires google-api-core[grpc]<3.0.0,>=2.16.0, but you have google-api-core 1.34.1 which is incompatible.

google-cloud-storage 2.19.0 requires google-api-core<3.0.0dev,>=2.15.0, but you have google-api-core 1.34.1 which is incompatible.

pandas-gbq 0.28.0 requires google-api-core<3.0.0dev,>=2.10.2, but you have google-api-core 1.34.1 which is incompatible.

# Run a quick check to see if the GPU is being used
!python -m speciesnet.scripts.gpu_test

*** Running Torch tests ***


Torch version: 2.6.0+cu124

CUDA available (according to PyTorch): True

CUDA version (according to PyTorch): 12.4

CuDNN version (according to PyTorch): 90100

Found 1 CUDA devices:

0: Tesla P100-PCIE-16GB

# Configuration for Multithreading and Batching
num_batches = 10
max_threads = 8
output_root = Path("output")

# Prepare folders
output_root.mkdir(exist_ok=True)
images_root = Path("images")
images_root.mkdir(exist_ok=True)

# Get the stored mongo uri secret
user_secrets = UserSecretsClient()
mongo_uri = user_secrets.get_secret("MONGO_URI")

# Connect to the MongoDB client
client = MongoClient(mongo_uri)
 
# Access the database and collection
db = client['test']
collection = db['cameratrapmedias'] 
 
# Query the collection to retrieve records with image URLs, metadata, and the first index of 'relativePath'
data = list(collection.aggregate([
    {
        '$project': {
            '_id': 0,
            'publicURL': 1,
            'timestamp': 1,
            'folderName': { '$arrayElemAt': ['$relativePath', 1] },
            'fileName': 1,
            'mediaID': 1
        }
    },
    # { '$limit': 150 }
]))
 
# Convert the data to a pandas DataFrame for exploration
df = pd.DataFrame(data)

# Export the small array to a CSV file for preview
df.to_csv('ur_test_medias.csv', index=False)

# preview df
df.head()

# This function will format the final string
def make_filename(s):
    # s = s.lower()
    s = re.sub(r'[^\w\s.-]', '', s) # remove special characters except dash or underscore or period
    s = re.sub(r'\s+', '_', s) # replace whitespace with underscore
    return s

# Combine the relative path second (folder name) + fileName
df['imageName'] = df['folderName'] + '--' + df['fileName']
df['imageName'] = df['imageName'].apply(make_filename)

df.head()

%%time
# Create a directory to save the images
output_root.mkdir(exist_ok=True)
path = Path('images')
path.mkdir(exist_ok=True)

# Create a tool for resizing so cropping top and bottom can happen while keeping the aspect ratio
def resize_to_height(image, target_height=256):
    og_width, og_height = image.size
    new_width = int(og_width * (target_height / og_height))
    return image.resize((new_width, target_height))

# Create a tool for downloading and processing images
def process_row(row, dest_folder):
    url = row['publicURL']
    filename = f"{row['mediaID']}.jpg"
    # Download the image
    dest = dest_folder/filename

    try:
        # Download image to memory
        response = requests.get(url)
        response.raise_for_status()

        # Open and process the image
        image = Image.open(BytesIO(response.content)).convert("RGB")
        image = resize_to_height(image, target_height=256)
        image.save(dest, format="JPEG", quality=85)
        
    except Exception as e:
        print(f"failed to process{filename}: {e}")

# Download and display some images where at least an animal was found - ex rat
df_test = df[44410:44460] # 50 images with some known animal detections
df_big_chunk = df[0:10000] # first 10000 images
df_bigger_chunk = df[10001:60001] # second 50k
df_remaining = df[60002:] # last bit

# Process Batches
for batch_idx, df_chunk in enumerate(np.array_split(df, num_batches)): # change to df_test to split a test batch size
    batch_folder = images_root / f'batch_{batch_idx}'
    batch_folder.mkdir(exist_ok=True)

    print(f'Processing batch {batch_idx + 1} / {num_batches} with {len(df_chunk)} images...')

    start = time.time()
    
    with ThreadPoolExecutor(max_workers=max_threads) as executor:
        executor.map(lambda row: process_row(row, batch_folder), [row for _, row in df_chunk.iterrows()])

    end = time.time()
    print(f"Batch {batch_idx+1} took {end - start:.2f} seconds.")
        
print(f'{len(df)} Images Downloaded and Resized')

/usr/local/lib/python3.11/dist-packages/numpy/core/fromnumeric.py:59: FutureWarning: 'DataFrame.swapaxes' is deprecated and will be removed in a future version. Please use 'DataFrame.transpose' instead.
  return bound(*args, **kwds)

Processing batch 1 / 10 with 10517 images...
failed to process23f3019217b595aea2141e9e1352c58d.jpg: 503 Server Error: Service Unavailable for url: https://urbanriverrangers.s3.amazonaws.com/images/2024/2024-02-01_Bubbly_spypoint_garden/DCIM/100DSCIM/HDPH5311.JPG
Batch 1 took 2563.99 seconds.
Processing batch 2 / 10 with 10517 images...
Batch 2 took 2021.54 seconds.
Processing batch 3 / 10 with 10517 images...
Batch 3 took 2908.26 seconds.
Processing batch 4 / 10 with 10516 images...
Batch 4 took 2309.73 seconds.
Processing batch 5 / 10 with 10516 images...
Batch 5 took 2567.26 seconds.
Processing batch 6 / 10 with 10516 images...
Batch 6 took 2795.10 seconds.
Processing batch 7 / 10 with 10516 images...
Batch 7 took 2497.84 seconds.
Processing batch 8 / 10 with 10516 images...
Batch 8 took 2714.70 seconds.
Processing batch 9 / 10 with 10516 images...
Batch 9 took 2181.21 seconds.
Processing batch 10 / 10 with 10516 images...
Batch 10 took 2252.93 seconds.
105163 Images Downloaded and Resized
CPU times: user 5h 1min 38s, sys: 39min 25s, total: 5h 41min 3s
Wall time: 6h 53min 32s

# Uncomment and run this if the images need to be redone
# !rm images -r
# !rm docs.zip
# %lsmagic

def print_predictions(predictions_dict: dict) -> None:
    print("Predictions:")
    for prediction in predictions_dict["predictions"][0:1]:
        print(prediction["filepath"], "=>", prediction["prediction"])

# Choose the folder we're going to download the model to
model_path = '/content/models'
os.makedirs(model_path, exist_ok=True)

# Download the model (it will go to a folder like /kaggle/input/...)
download_path = kagglehub.model_download('google/speciesnet/PyTorch/v4.0.1a',
                                          force_download=True)

print('Model downloaded to temporary folder: {}'.format(download_path))

# List the contents of the downloaded directory to identify the actual files/subdirectories
model_files = os.listdir(download_path)

# Copy the contents of the model file to our destination folder
for item_name in model_files:
    source_path = os.path.join(download_path, item_name)
    destination_path = os.path.join(model_path, item_name)
    if os.path.isfile(source_path):
        shutil.copy2(source_path, destination_path)
    elif os.path.isdir(source_path):
        shutil.copytree(source_path, destination_path, dirs_exist_ok=True)

print('{} files copied to: {}'.format(len(model_files),model_path))

Model downloaded to temporary folder: /kaggle/input/speciesnet/pytorch/v4.0.1a/1
6 files copied to: /content/models

# Pick the model we want to use (4.0.1a)
model = SpeciesNet(model_path)

print('Model Loaded')

Model Loaded

# Let's format a request string as a list of dicts (aka JSON string format)
def create_instances(batch_folder):
    image_paths = [f'{batch_folder}/{f}' for f in os.listdir(batch_folder) if f.lower().endswith('.jpg')]

    instances = []
    for image_path in image_paths:
        instances.append({
            'filepath': image_path
        })

    # Check that it's saved correctly by verifying the first
    print(instances[0:1])

    return instances


for batch_index in range(len(os.listdir(images_root))):
    instances = create_instances(f'{images_root}/batch_{batch_index}')

    # make the predictions and get a sense of how long it would take
    %time predictions_dict = model.predict(instances_dict={"instances": instances})

    print_predictions(predictions_dict) # show the first prediction of each batch

    # Save the dict to the batch folder
    with open(f'{images_root}/batch_{batch_index}/predictions_dict_{batch_index}.json', 'w') as f:
        json.dump(predictions_dict, f, indent=2)

    print(f'predictions_dict_{batch_index}.json saved to {images_root}/batch_{batch_index}')

[{'filepath': 'images/batch_0/c89e327383d91bdaadda59e65c57eec8.jpg'}]
CPU times: user 36min 8s, sys: 15.5 s, total: 36min 23s
Wall time: 17min 35s
Predictions:
images/batch_0/c89e327383d91bdaadda59e65c57eec8.jpg => f1856211-cfb7-4a5b-9158-c0f72fd09ee6;;;;;;blank
predictions_dict_0.json saved to images/batch_0
[{'filepath': 'images/batch_1/a09ebf60205fc899468da21db62b9ab2.jpg'}]
CPU times: user 35min 23s, sys: 21.5 s, total: 35min 44s
Wall time: 17min 21s
Predictions:
images/batch_1/a09ebf60205fc899468da21db62b9ab2.jpg => f1856211-cfb7-4a5b-9158-c0f72fd09ee6;;;;;;blank
predictions_dict_1.json saved to images/batch_1
[{'filepath': 'images/batch_2/0808735130905c331d4fde461edae677.jpg'}]
CPU times: user 40min 47s, sys: 22.7 s, total: 41min 10s
Wall time: 19min 56s
Predictions:
images/batch_2/0808735130905c331d4fde461edae677.jpg => f1856211-cfb7-4a5b-9158-c0f72fd09ee6;;;;;;blank
predictions_dict_2.json saved to images/batch_2
[{'filepath': 'images/batch_3/05fe979865464a7deab591234e216cce.jpg'}]
CPU times: user 37min 2s, sys: 21.8 s, total: 37min 24s
Wall time: 18min 2s
Predictions:
images/batch_3/05fe979865464a7deab591234e216cce.jpg => f1856211-cfb7-4a5b-9158-c0f72fd09ee6;;;;;;blank
predictions_dict_3.json saved to images/batch_3
[{'filepath': 'images/batch_4/ba7d7fa46db57412d5b6266977c3d150.jpg'}]
CPU times: user 40min 50s, sys: 23 s, total: 41min 13s
Wall time: 19min 57s
Predictions:
images/batch_4/ba7d7fa46db57412d5b6266977c3d150.jpg => b1352069-a39c-4a84-a949-60044271c0c1;aves;;;;;bird
predictions_dict_4.json saved to images/batch_4
[{'filepath': 'images/batch_5/91876d0caa6c779949f5fc03178ca3bc.jpg'}]
CPU times: user 42min 12s, sys: 19.6 s, total: 42min 32s
Wall time: 20min 27s
Predictions:
images/batch_5/91876d0caa6c779949f5fc03178ca3bc.jpg => f1856211-cfb7-4a5b-9158-c0f72fd09ee6;;;;;;blank
predictions_dict_5.json saved to images/batch_5
[{'filepath': 'images/batch_6/755e554dacb9e7a3065f7ddf0bacee44.jpg'}]
CPU times: user 38min, sys: 23.6 s, total: 38min 24s
Wall time: 18min 33s
Predictions:
images/batch_6/755e554dacb9e7a3065f7ddf0bacee44.jpg => f1856211-cfb7-4a5b-9158-c0f72fd09ee6;;;;;;blank
predictions_dict_6.json saved to images/batch_6
[{'filepath': 'images/batch_7/c1d9861e149921a8a6d5678e7871ff1f.jpg'}]
CPU times: user 40min 7s, sys: 16.7 s, total: 40min 24s
Wall time: 19min 30s
Predictions:
images/batch_7/c1d9861e149921a8a6d5678e7871ff1f.jpg => f1856211-cfb7-4a5b-9158-c0f72fd09ee6;;;;;;blank
predictions_dict_7.json saved to images/batch_7
[{'filepath': 'images/batch_8/75899b821aa01f10169e7a32b302667b.jpg'}]
CPU times: user 37min 13s, sys: 17.8 s, total: 37min 31s
Wall time: 18min 3s
Predictions:
images/batch_8/75899b821aa01f10169e7a32b302667b.jpg => 990ae9dd-7a59-4344-afcb-1b7b21368000;mammalia;primates;hominidae;homo;sapiens;human
predictions_dict_8.json saved to images/batch_8
[{'filepath': 'images/batch_9/14718d070c52d81c2a05c095d5511dcc.jpg'}]
CPU times: user 36min 13s, sys: 18.3 s, total: 36min 32s
Wall time: 17min 35s
Predictions:
images/batch_9/14718d070c52d81c2a05c095d5511dcc.jpg => 990ae9dd-7a59-4344-afcb-1b7b21368000;mammalia;primates;hominidae;homo;sapiens;human
predictions_dict_9.json saved to images/batch_9

# To concatenate all the json files
output_file = output_root / "predictions_dict_master.json"

# Initialize the master predictions list
master_predictions = []

# Loop through files matching the pattern
for json_file in sorted(images_root.glob("batch_*/predictions_dict_*.json")):
    with open(json_file, "r") as f:
        data = json.load(f)
        if "predictions" in data:
            master_predictions.extend(data["predictions"])  # Concatenate predictions!
        else:
            print(f"{json_file} missing 'predictions' key")

# Write the combined predictions to a new file
with open(output_file, "w") as f:
    json.dump({"predictions": master_predictions}, f, indent=2)

print(f"Combined {len(master_predictions)} predictions into {output_file}")

Combined 104937 predictions into output/predictions_dict_master.json

# Create a docs folder for previewing the images
output_path = '/kaggle/working/output/docs'
os.makedirs(output_path, exist_ok=True)

# change n to sample to -1 for all
!python -m megadetector.postprocessing.postprocess_batch_results /kaggle/working/output/predictions_dict_master.json /kaggle/working/output/docs --num_images_to_sample 2000 --confidence_threshold 0.5

Loading results from /kaggle/working/output/predictions_dict_master.json

This appears to be a SpeciesNet output file, converting to MD format

Writing temporary results to /tmp/megadetector_temp_files/c658fd2c-3931-11f0-8f1e-0242ac130202.json

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Converting results to dataframe

Finished loading MegaDetector results for 104937 images from /kaggle/working/output/predictions_dict_master.json

Assigning images to rendering categories

100%|████████████████████████████████| 104937/104937 [00:03<00:00, 27707.61it/s]

Finished loading and preprocessing 104937 rows from detector output, predicted 25288 positives.

100%|███████████████████████████████████████| 2000/2000 [00:25<00:00, 77.01it/s]

Rendered 2000 images (of 2000) in 25.97 seconds (0.01 seconds per image)

Generating classification category report

This appears to be a SpeciesNet output file, converting to MD format

Writing temporary results to /tmp/megadetector_temp_files/e4a28d20-3931-11f0-8f1e-0242ac130202.json

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Warning: creating fake detection for non-blank whole-image classification

Finished writing html to /kaggle/working/output/docs/index.html

shutil.make_archive('/kaggle/working/output/docs', 'zip', '/kaggle/working/output/docs')

# and finally clean up the tree that made it this far
shutil.rmtree('/kaggle/working/output/docs')  # Deletes the folder

# Remove the image directories because nobody needs to store them at the end here
shutil.rmtree('/kaggle/working/images')

print("Files cleaned up")

Files cleaned up

	mediaID	timestamp	publicURL	fileName	folderName
0	fb04201b6417ea917fdd24e1a7415d8a	2024-01-27 13:33:15	https://urbanriverrangers.s3.amazonaws.com/ima...	SYFW0060.JPG	2024-01-30_prologis_02
1	c112813a5f3b9cec26f95fad982b8d09	2024-01-24 18:56:50	https://urbanriverrangers.s3.amazonaws.com/ima...	SYFW0001.JPG	2024-01-30_Learnin_platform_camera_test
2	0647380f2d59692f5b2b642312844e9f	2024-01-24 19:01:54	https://urbanriverrangers.s3.amazonaws.com/ima...	SYFW0002.JPG	2024-01-30_Learnin_platform_camera_test
3	0db73c6c1efb4968c04a47e418ebeefb	2024-01-24 19:03:05	https://urbanriverrangers.s3.amazonaws.com/ima...	SYFW0004.JPG	2024-01-30_Learnin_platform_camera_test
4	31fc53de29056b4dd8bc7b1804617f00	2024-01-24 19:04:19	https://urbanriverrangers.s3.amazonaws.com/ima...	SYFW0006.JPG	2024-01-30_Learnin_platform_camera_test

	mediaID	timestamp	publicURL	fileName	folderName	imageName
0	fb04201b6417ea917fdd24e1a7415d8a	2024-01-27 13:33:15	https://urbanriverrangers.s3.amazonaws.com/ima...	SYFW0060.JPG	2024-01-30_prologis_02	2024-01-30_prologis_02--SYFW0060.JPG
1	c112813a5f3b9cec26f95fad982b8d09	2024-01-24 18:56:50	https://urbanriverrangers.s3.amazonaws.com/ima...	SYFW0001.JPG	2024-01-30_Learnin_platform_camera_test	2024-01-30_Learnin_platform_camera_test--SYFW0...
2	0647380f2d59692f5b2b642312844e9f	2024-01-24 19:01:54	https://urbanriverrangers.s3.amazonaws.com/ima...	SYFW0002.JPG	2024-01-30_Learnin_platform_camera_test	2024-01-30_Learnin_platform_camera_test--SYFW0...
3	0db73c6c1efb4968c04a47e418ebeefb	2024-01-24 19:03:05	https://urbanriverrangers.s3.amazonaws.com/ima...	SYFW0004.JPG	2024-01-30_Learnin_platform_camera_test	2024-01-30_Learnin_platform_camera_test--SYFW0...
4	31fc53de29056b4dd8bc7b1804617f00	2024-01-24 19:04:19	https://urbanriverrangers.s3.amazonaws.com/ima...	SYFW0006.JPG	2024-01-30_Learnin_platform_camera_test	2024-01-30_Learnin_platform_camera_test--SYFW0...

Introduction¶

Notebook Setup and Required Packages¶

Access The URIs from S3 through MongoDB¶

We are going to create a column that creates a file name to save the image¶

Download Images¶

Running Species Net on the Full Dataset¶

We're going to try a multithreading chunks approach¶

Download Model¶

Let's save the predictions dict json file¶

Let's zip the folder so we can easily download it¶