Source code for pythologist.interface

import pandas as pd
import numpy as np
from pythologist.measurements import Measurement
from pythologist_image_utilities import watershed_image, map_image_ids
import sys, os, io
import imageio
from PIL import Image
from pythologist import SubsetLogic as SL
from scipy.ndimage import gaussian_filter

[docs]class SegmentationImageOutput(pd.DataFrame):
    """
    The Segmentation Image Output class 
    """
    _metadata = []
    def __init__(self,*args, **kw):
        super(SegmentationImageOutput, self).__init__(*args, **kw) 
        if 'verbose' in kw: self.verbose = kw['verbose']
    @property
    def _constructor(self):
        return SegmentationImageOutput
[docs]    def write_to_path(self,path,suffix='',format='png',overwrite=False):
        """
        Output the data the dataframe's 'image' column to a directory structured by project->sample and named by frame

        Args:
            path (str): Where to write the directory of images
            suffix (str): for labeling the imaages you write
            format (str): default 'png' format to write the file
            overwrite (bool): default False. if true can overwrite files in the path

        Modifies:
            Creates path folder if necessary and writes images to path
        """
        if os.path.exists(path) and overwrite is False: raise ValueError("Error: use ovewrite=True to overwrite images")
        if not os.path.exists(path): os.makedirs(path)
        for i,r in self.iterrows():
            spath = os.path.join(path,r['project_name'],r['sample_name'])
            if not os.path.exists(spath): os.makedirs(spath)
            if suffix == '':
                fname = os.path.join(spath,r['frame_name']+'.'+format)
            else: fname = os.path.join(spath,r['frame_name']+'_'+suffix+'.'+format)
            imageio.imwrite(fname, r['image'],format=format)

[docs]class SegmentationImages(Measurement):
    """
    Class suitable for generating image outputs
    """
    def __init__(self,*args,**kwargs):
        """
        Args:
            verbose (bool): output more details if True
        """
        super(SegmentationImages,self).__init__(*args,**kwargs)
        self._edge_map_cache = None
        self._cell_map_cache = None
        self._coordinates = None
    @staticmethod
    def _preprocess_dataframe(cdf,*args,**kwargs):
        base = pd.DataFrame(cdf).copy().loc[:,cdf.frame_columns].drop_duplicates()
        data = []
        if 'verbose' in kwargs and kwargs['verbose']: sys.stderr.write("Reading image names/ids and sizes\n")
        for sample_id in base['sample_id'].unique().tolist():
            if 'verbose' in kwargs and kwargs['verbose']: sys.stderr.write("Reading sample "+str(sample_id)+"\n")
            s = cdf.db.get_sample(sample_id)
            for frame_id in base.loc[base['sample_id']==sample_id,'frame_id']:
                f = s.get_frame(frame_id)
                data.append([sample_id,frame_id,f.shape])


        base = base.merge(pd.DataFrame(data,columns=['sample_id','frame_id','shape']),on=['sample_id','frame_id'])
        return base


    def get_coordinates(self):
        if self._coordinates is not None: return self._coordinates
        df = self.set_index(self.cdf.frame_columns+['shape']).stack().\
            reset_index().rename(columns={'level_7':'image_type',0:'image'}).\
            set_index(self.cdf.frame_columns+['shape','image_type'])
        imgs = []
        for i,r in df.iterrows():
            if self.verbose: sys.stderr.write("Extracting coordinates from "+str(list(i))+"\n")
            left = pd.DataFrame([i],columns=df.index.names)
            left['_key'] = 1
            img=map_image_ids(r['image']).groupby('id').apply(lambda x: list(zip(*x[['x','y']].apply(tuple).tolist())))
            img = img.reset_index().rename(columns={'id':'cell_index',0:'coords'})
            img['_key'] = 1
            img = left.merge(img,on='_key').drop(columns='_key')
            imgs.append(img)
        imgs = pd.concat(imgs)
        self._coordinates = imgs
        return imgs

    def get_segmentation_map_images(self,type='edge',subset_logic=None,color=None,watershed_steps=0,blank=(0,0,0,255)):
        if self.verbose: sys.stderr.write("getting segmap "+str(type)+"\n")
        ems = self.get_segmentation_maps(type=type)
        subset = self.cdf
        if subset_logic is not None: 
            subset = self.cdf.subset(subset_logic)
            ems = ems.merge(subset.loc[:,subset.frame_columns+['cell_index']],on=subset.frame_columns+['cell_index'])
        #edf = ems.set_index(list(self.columns))
        imgs = []
        for i,r in self.iterrows():
            imsize = r['shape']
            img = pd.DataFrame(np.zeros(imsize))
            if subset.shape[0] == 0: # case where there is nothing to do
                if self.verbose: sys.stderr.write("Empty image for this phenotype subset\n")
                imgs.append(list(r)+[np.array(img)])
                continue
            #edfsub = edf.loc[tuple(r)]
            edfsub = ems.loc[ems['frame_id']==r['frame_id']].copy().set_index(list(self.columns))

            #if self.verbose: sys.stderr.write("make image and fill zeros\n")
            fullx = pd.DataFrame({'x':list(range(0,imsize[1]))})
            fullx['_key']=1
            fully = pd.DataFrame({'y':list(range(0,imsize[0]))})
            fully['_key']=1
            full = fullx.merge(fully,on='_key').merge(edfsub,on=['x','y'],how='left').fillna(0)
            img = np.array(full.pivot(columns='x',index='y',values='cell_index').astype(int))
            if map_image_ids(img).shape[0] == 0:
                # There is nothing for us to draw with this phenotype and image
                imgs.append(list(r)+[np.zeros(imsize)])
                continue
            #if self.verbose: sys.stderr.write("finished making image and fill zeros\n")
            if watershed_steps > 0:
                # get the zero and nonzero components
                mid = map_image_ids(img,remove_zero=False)
                midzero = list(zip(*mid.query('id==0').copy()[['x','y']].apply(tuple).tolist()))
                mid = list(zip(*mid.query('id!=0').copy()[['x','y']].apply(tuple).tolist()))
                img = watershed_image(np.array(img),mid,midzero,steps=watershed_steps)
            if color is not None:
                # we need to make a new image thats colored in
                fresh = np.zeros(list(imsize)+[len(color)]).astype(int)
                blank = tuple(list(blank)[0:len(color)])
                fresh[:][:] = blank
                # get our coordinates
                coords = np.array(list(zip(*map_image_ids(img)[['y','x']].apply(tuple).tolist())))
                fresh[tuple([*coords.T])] = color
                #for i2,r2 in map_image_ids(img).iterrows():
                #    fresh[r2['y']][r2['x']] = color
                img = fresh
            imgs.append(list(r)+[img])
        imgs = pd.DataFrame(imgs,columns=list(self.columns)+['image'])
        return imgs


[docs]    def build_segmentation_image(self,schema,background=(0,0,0,0)):
        """
        Put together an image.  Defined by a list of layers with RGBA colors

        Make the schema example
        
        |    schema = [
        |        {'subset_logic':SL(phenotypes=['SOX10+']),
        |         'edge_color':(31, 31, 46,255),
        |         'watershed_steps':0,
        |         'fill_color':(51, 51, 77,255)
        |        },
        |        {'subset_logic':SL(phenotypes=['CD8+'],scored_calls={'PD1':'+'}),
        |         'edge_color':(255,0,0,255),
        |         'watershed_steps':1,
        |         'fill_color':(0,0,0,255)
        |        },
        |        {'subset_logic':SL(phenotypes=['CD8+'],scored_calls={'PD1':'-'}),
        |         'edge_color':(255,0,255,255),
        |         'watershed_steps':1,
        |         'fill_color':(0,0,255,255)
        |        }
        |    ]
        |    imgs = imageaccess.build_segmentation_image(schema,background=(0,0,0,255))
        

        Args:
            schema (list): a list of layers (see example above)
            background (tuple): a color RGBA 0-255 tuple for the. background color
        Returns:
            SegmentationImageOutput: an output suitable for writing images
        """
        cummulative = self.copy()
        def _set_blank(img,blank):
            img[:][:] = blank
            return img
        cummulative['merged'] = cummulative.apply(lambda x: 
            _set_blank(np.zeros(list(x['shape'])+[4]),background)
            ,1)
        for layer in schema:
            if self.verbose: sys.stderr.write("Calculating layer "+str(layer)+"\n")
            images  = self.get_outline_images(subset_logic=layer['subset_logic'],
                                              edge_color=layer['edge_color'],
                                              watershed_steps=layer['watershed_steps'],
                                              fill_color=layer['fill_color'])
            cummulative = cummulative.rename(columns={'merged':'old'})
            cummulative = cummulative.merge(images,on=list(self.columns))
            cummulative['new'] = cummulative.apply(lambda x: _merge_images(x['merged'],x['old']),1)
            cummulative = cummulative.drop(columns=['old','merged']).rename(columns={'new':'merged'})
        cummulative = cummulative.rename(columns={'merged':'image'})
        return SegmentationImageOutput(cummulative)

    def get_outline_images(self,subset_logic=None,edge_color=(0,0,255,255),fill_color=(135,206,250,255),watershed_steps=1):
        if len(edge_color) == 3: edge_color = tuple(list(edge_color)+[255])
        if len(fill_color) == 3: fill_color = tuple(list(fill_color)+[255])
        #if self.verbose: sys.stderr.write("getting outline image\n")
        #if self.verbose: sys.stderr.write("reading edges\n")
        edge_images = self.get_segmentation_map_images(type='edge',subset_logic=subset_logic,color=edge_color,blank=(0,0,0,0),watershed_steps=watershed_steps).\
            rename(columns={'image':'edge'})
        if self.verbose: sys.stderr.write("reading cells\n")
        cell_images = self.get_segmentation_map_images(type='cell',subset_logic=subset_logic,color=fill_color,blank=(0,0,0,0)).\
            rename(columns={'image':'cell'})
        if self.verbose: sys.stderr.write("merge edge and cell\n")
        v = edge_images.merge(cell_images,on=list(self.columns))
        v['merged'] = v.apply(lambda x: _merge_images(x['edge'],x['cell']),1)
        #if self.verbose: sys.stderr.write("finished outline image\n")
        return v.drop(columns=['cell','edge'])


    def get_segmentation_maps(self,type='edge'):
        if type == 'edge' and self._edge_map_cache is not None: return self._edge_map_cache
        if type == 'cell' and self._cell_map_cache is not None: return self._cell_map_cache
        if self.verbose: sys.stderr.write("The "+str(type)+" map has not been calculated yet. ... computing.\n")
        outputs = self.apply_frames(lambda x: x.edge_map() if type == 'edge' else x.cell_map())
        dfs = []
        for i,r in outputs.iterrows():
            df = r['output']
            df.index = pd.Index([i for temp in range(0,r['output'].shape[0])])
            df.index.set_names(list(self.columns),inplace=True)
            dfs.append(df)
        dfs = pd.concat(dfs).reset_index()
        # we don't need every cell.. just the ones in this cell data frame
        dfs = dfs.merge(self.cdf[list(self.cdf.frame_columns)+['cell_index']],on=list(self.cdf.frame_columns)+['cell_index'])
        if type == 'edge': self._edge_map_cache = dfs
        elif type == 'cell': self._cell_map_cache = dfs
        else: raise ValueError('edge or cell')
        if self.verbose: sys.stderr.write("The "+str(type)+" map is finished.\n")
        return dfs

    def apply_frames(self,func):
        samples = self['sample_id'].unique().tolist()
        data = []
        for sample_id in samples:
            s = self.cdf.db.get_sample(sample_id)
            if self.verbose: sys.stderr.write("Read in sample "+s.sample_name+" ("+str(sample_id)+")\n")
            for frame_id in self.loc[self['sample_id']==sample_id,'frame_id']:
                f = s.get_frame(frame_id)
                data.append([sample_id,frame_id,func(f)])
        return self.merge(pd.DataFrame(data,columns=['sample_id','frame_id','output']),on=['sample_id','frame_id']).set_index(list(self.columns))

def _merge_images(image1,image2):
    edge = np.uint8(image1)
    cell = np.uint8(image2)
    bedge = io.BytesIO()
    bcell = io.BytesIO()
    imageio.imwrite(bedge, edge,format='tif')
    imageio.imwrite(bcell, cell,format='tif')
    pedge = Image.open(bedge)
    pcell = Image.open(bcell)
    pcell.paste(pedge, (0, 0), pedge)
    return np.array(pcell)


def _get_new_regions(cdf,sample,frame_id,unset_label='undefined',gaussian_sigma=66,verbose=False):
    sub = cdf.loc[cdf['frame_id']==frame_id]
    #print(sub.iloc[0][['project_name','sample_name','frame_name']])
    #print(sub.shape)
    shape = sub.iloc[0]['frame_shape']
    dfs = {}
    sid = sub.iloc[0]['sample_id']
    fid = sub.iloc[0]['frame_id']
    proc = cdf.db.get_sample(sid).get_frame(fid).processed_image
    present = sub['phenotype_label'].unique()
    for p in sub.phenotypes:
        empty = np.zeros(shape)
        if p not in present:
            dfs[p] = empty.copy().astype(float)
            continue
        emap = pd.DataFrame(empty).stack().reset_index().astype(int)
        emap.columns = ['y','x','id']
        emap = emap.drop(columns='id')
        
        sel = sub.subset(SL(phenotypes=[p]))[['x','y']].drop_duplicates()
        sel['id'] = 1
        sel = emap.merge(sel,on=['x','y'],how='left').fillna(0).pivot(columns='x',index='y',values='id')
        sel = np.array(sel).astype(float)
        blur = gaussian_filter(sel,gaussian_sigma)
        dfs[p] = blur
    regions = {}
    
    #print('mask')
    remainder = np.ones(shape).astype(bool)
    for p in dfs:
        others = set(dfs.keys())-set([p])
        result = np.ones(dfs[p].shape).astype(bool)
        for o in others:
            result = (dfs[p] > dfs[o])&result&proc
        #print(p)
        result = result.astype(np.uint8)
        remainder = remainder&(~result.astype(bool))
        regions[p] = result.astype(np.uint8)
    regions[unset_label] = (remainder&proc).astype(np.uint8)
    return regions

def phenotypes_to_regions(cdf,path,
                          gaussian_sigma=66,
                          verbose=False,
                          overwrite=False,
                          unset_label='undefined',
                          project_name='region-refactor'):
    def _get_label(regions_dict,x,y):
        for label in regions_dict:
                if regions_dict[label][y][x] == 1: return label
        return np.nan

    if os.path.exists(path) and not overwrite: raise ValueError("cannot overwrite unless overwrite is True")
    if cdf.db is None: raise ValueError("You need the storage object set for this function")
    output = cdf.db.__class__(path,mode='w')
    output.project_name = project_name
    dfs = {}
    ocdf = cdf.copy()
    subs = []
    for sample_id in cdf['sample_id'].unique():
        sample = cdf.db.get_sample(sample_id)
        if verbose: sys.stderr.write("==========\nSample: "+sample.sample_name+"\n")
        for frame_id in cdf.loc[(cdf['sample_id']==sample_id),'frame_id'].unique():
            #sub = ocdf.loc[(ocdf['sample_id']==sample_id)&(ocdf['frame_id']==frame_id)].copy()
            regions = _get_new_regions(cdf,sample,frame_id,
                          verbose=verbose,
                          gaussian_sigma=gaussian_sigma,
                          unset_label = unset_label
                          )
            dfs[frame_id] = regions
            f = sample.get_frame(frame_id)
            f.set_regions(regions,
                          use_processed_region=True,
                          unset_label='undefined2',
                          verbose=verbose)
            if verbose: sys.stderr.write("    "+f.frame_name+"\n")
        output.append_sample(sample)
    # Now update the cell dataframe
    if verbose: sys.stderr.write("update phenotype_labels\n")
    ocdf['region_label'] = ocdf.apply(lambda x: _get_label(dfs[x['frame_id']],x['x'],x['y']),1)
    ocdf = ocdf.loc[~ocdf['region_label'].isna()].copy()
    region_sizes = {}
    for fid in dfs: 
        region_sizes[fid] = {}
        for label in dfs[fid]: region_sizes[fid][label] = int(dfs[fid][label].astype(int).sum().sum())
    ocdf['regions'] = ocdf.apply(lambda x: region_sizes[x['frame_id']],1)
    return ocdf, output
def get_region_images(cdf,output_path,colors,background_color='#000000',overwrite=False,format='png',verbose=False):
    def hex_to_rgb(h):
        h = h.lstrip('#')
        v =  tuple(list(int(h[i:i+2], 16) for i in (0, 2, 4))+[255])
        return [x/255 for x in v]

    def write_regions(frame,basedir,colors,background_color,format):
        rshape = frame.get_data('regions').iloc[0]['image_id']
        rshape = frame.get_image(rshape).shape
        start = np.zeros(list(rshape)+[4])
        start[:,:]=hex_to_rgb(background_color)
        fname = frame.frame_name
        for i,r in frame.get_data('regions').iterrows():
            col = colors[r['region_label']]
            img = frame.get_image(r['image_id'])
            start[img==1]=hex_to_rgb(col)
        imageio.imwrite(os.path.join(basedir,fname+'.'+format), start,format=format)

    if not cdf.db: raise ValueError("Need db set")
    if os.path.exists(output_path) and not overwrite: raise ValueError("overwrite is set to False")
    #os.makedirs(output_path)
    #for s in cdf.db.sample_iter():
    #    os.makedirs(os.path.join(output_path,s.sample_name))
    values = cdf.loc[:,['project_name','sample_id','frame_name','frame_id']].drop_duplicates()
    for pname in values['project_name'].unique():
        samples = values.loc[values['project_name']==pname]
        for sid in samples['sample_id'].unique():
            s = cdf.db.get_sample(sid)
            sname = s.sample_name
            if verbose: sys.stderr.write(str((pname,sname))+"\n")
            basedir = os.path.join(output_path,pname,sname)
            if not os.path.exists(basedir):
                os.makedirs(basedir)
            frames = samples.loc[samples['sample_id']==sid]
            for fid in frames['frame_id'].unique():
                write_regions(s.get_frame(fid),basedir,colors,background_color,format)