Cox_AMIL / Git / [0fdc30] /tma_utils/tma

Models:
RichardZick/
Cox_AMIL
Downloads: 1
[0fdc30]: / tma_utils / tma_utils.py
History
Download this file
144 lines (103 with data), 5.0 kB

import numpy as np
import openslide
import pandas as pd
import pyvips
import re
from typing import List, Tuple


def extract_core(slide: openslide.OpenSlide, 
                 df: pd.DataFrame, 
                 level: int=0,
                 core_id: int=None, 
                 core_label: str=None) -> np.ndarray:
    """Extracts core as numpy array from TMA slide.

    Args:
        slide (openslide.OpenSlide): TMA slide.
        df (pd.DataFrame): DataFrame with core annotations.
        level (int, optional): Level to sample. Defaults to 0.
        core_id (int, optional): Core identifier (e.g. 123). Defaults to None.
        core_label (str, optional): Core label (e.g. P123_1). Defaults to None.

    Raises:
        ValueError: If core_id and core_label do not match when both are provided.
        ValueError: If neither core_id nor core_label are provided.

    Returns:
        np.ndarray: Core as numpy array with RGBA format.
    """

    if core_label is not None:
        XY = df[['core_id', 'core_label', 'x', 'y']].loc[df['core_label'] == core_label]
    elif core_id is not None:
        XY = df[['core_id', 'core_label', 'x', 'y']].loc[df['core_id'] == core_id]
    elif core_label is not None and core_id is not None:
        XY = df[['core_id', 'core_label', 'x', 'y']].loc[df['core_id'] == core_id]
        if XY.core_label.unique()[0] != core_label:
            raise ValueError('core_id and core_label do not match') 
    else:
        raise ValueError('Expected either core_id or core_label.')

    if XY.shape[0] == 0:
        print(f'No annotations for core_id {core_id} available.')
        
    else:    
        x_min, y_min = int(min(XY.x)), int(min(XY.y))
        width, height = int(np.ptp(XY.x)), int(np.ptp(XY.y))
        
        downf = slide.level_downsamples[level]
        location = x_min, y_min
        size = int(width/downf), int(height/downf)   
        core = slide.read_region(location=location, size=size, level=level)

        return np.asarray(core) 



def core_2_vips(core: np.ndarray) -> pyvips.Image:
    """Converts numpy array to pyvips image.

    Args:
        core (np.ndarray): Numpy array.

    Returns:
        pyvips.Image: Pyvips image. 
    """
    height, width, bands = core.shape
    vi = pyvips.Image.new_from_memory(core, width, height, bands, 'uchar')
    return vi 



def check_patientID_range(slide_id: int, low: int, high: int, df: pd.DataFrame) -> None:
    """Checks whether all patient ID's on a TMA are in the correct range. Verifys that no other labels are on the TMA.

    Args:
        slide_id (int): Slide identifier.
        low (int): Lowest patient ID.
        high (int): Highest patient ID.
        df (pd.DataFrame): Dataframe with core annotations.

    Returns:
        bool: True if all ID's are in the correct range.
    """
    persons_str = df.core_label.loc[df.slide_id == slide_id]
    persons_set = set([int(re.split('P|_', i)[1]) for i in persons_str])
    correct = (min(persons_set)>=low) and (max(persons_set)<=high)
    print(f'slide_id {slide_id}: {correct}')



def check_coreID_range(slide_id: int, df: pd.DataFrame, min: int = 1500, max: int = 6000, verbose: bool=False) -> Tuple[List[np.ndarray]]:
    """Checks the size of the core annotations. Cores that are too large indicate annotation mistakes (e.g. 2 cores apart with same label).

    Args:
        slide_id (int): Slide identifier
        df (pd.DataFrame): Dataframe with core annotations.
        min (int, optional): Minimum core size. Defaults to 1500.
        max (int, optional): Maximum core size. Defaults to 6000.
    """
    x_ranges, y_ranges = [], []
    too_big, too_small = [], []
    subdf = df[df.slide_id == slide_id]
    for core_id in subdf.core_id.unique():
        XY = subdf[['x', 'y']].loc[subdf['core_id'] == core_id]
        if XY.shape[0] > 0:
            x_range, y_range = np.ptp(XY.x), np.ptp(XY.y)
            x_ranges.append(x_range)
            y_ranges.append(y_range)
            if x_range > max or y_range > max:
                too_big.append((core_id, x_range, y_range)) 
            if x_range < min or y_range < min:
                too_small.append((core_id, x_range, y_range)) 
    
    if verbose:
        if len(too_big) > 0:
            print(f'\nSlide {slide_id} with {len(too_big)} suspicously large core annotations.')
            for core_id, x_range, y_range in too_big:
                print(f'Core {core_id}, x_range {x_range}, y_range {y_range}')
        else:
            print(f'\nSlide {slide_id} without suspicously large core annotations.')
        
        if len(too_small) > 0:
            print(f'\nSlide {slide_id} with {len(too_small)} suspicously small core annotations.')
            for core_id, x_range, y_range in too_small:
                print(f'Core {core_id}, x_range {x_range}, y_range {y_range}')
        else:
            print(f'\nSlide {slide_id} without suspicously small core annotations.')

    return x_ranges, y_ranges, too_big, too_small