a b/singlecellmultiomics/pyutils/pyutils.py
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
import math
4
import subprocess
5
import numpy as np
6
7
8
def sorted_slice(a,l,r):
9
    start = np.searchsorted(a, l, 'left')
10
    end = np.searchsorted(a, r, 'right')
11
    return np.arange(start,end)
12
13
def meanOfCounter(counter):
14
    sum_of_numbers = sum(
15
        number * count for number,
16
        count in counter.most_common())
17
    count = sum(count for n, count in counter.most_common())
18
    if count == 0:
19
        return 0
20
    return sum_of_numbers / count
21
22
23
def varianceOfCounter(counter):
24
25
    sum_of_numbers = sum(
26
        number * count for number,
27
        count in counter.most_common())
28
    if sum_of_numbers == 0:
29
        return 0
30
    count = sum(count for n, count in counter.most_common())
31
    total_squares = sum(
32
        number * number * count for number,
33
        count in counter.most_common())
34
    mean_of_squares = total_squares / count
35
    mean = sum_of_numbers / count
36
    variance = mean_of_squares - mean * mean
37
    return math.sqrt(variance)
38
39
40
def wccount(filename):
41
    out = subprocess.Popen(['wc', '-l', filename],
42
                           stdout=subprocess.PIPE,
43
                           stderr=subprocess.STDOUT
44
                           ).communicate()[0]
45
    return int(out.partition(b' ')[0])
46
47
48
def wccountgz(filename):
49
    try:
50
        cmd = f'zcat {filename} | wc -l'
51
        out = subprocess.Popen(cmd,
52
                               stdout=subprocess.PIPE,
53
                               stderr=subprocess.STDOUT,
54
                               shell=True
55
                               ).communicate()[0]
56
        return int(out.partition(b' ')[0])
57
    except Exception as e:
58
        return 0