forked from jfnavarro/st_analysis
-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathjson_to_matrix.py
More file actions
81 lines (70 loc) · 2.32 KB
/
Copy pathjson_to_matrix.py
File metadata and controls
81 lines (70 loc) · 2.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#! /usr/bin/env python
"""
Script that takes a ST-data file in JSON
format from the ST Pipeline and converts it
to a data frame (genes as columns and spots as rows).
The JSON format must be like this :
[
{
"y": 25,
"x": 31,
"hits": 1,
"barcode": "GATCGCTGAAAGGATAGA",
"gene": "ENSMUSG00000041378"
},
{
"y": 23,
"x": 13,
"hits": 4,
"barcode": "TGTTCCGATGGGAGAAGC",
"gene": "ENSMUSG00000001227"
},
....
The output data frame will be like
gene gene ...
XxY count count
XxY count count
...
@Author Jose Fernandez Navarro <jose.fernandez.navarro@scilifelab.se>
"""
import argparse
import sys
import os
from collections import defaultdict
import pandas as pd
import json
def main(json_file, outfile):
if not os.path.isfile(json_file) or not json_file.endswith(".json"):
sys.stderr.write("Error, input file not present or invalid format\n")
sys.exit(1)
if not outfile:
outfile = "data_table.tsv"
# Iterate the JSON file to get the counts
genes_spot_counts = defaultdict(lambda : defaultdict(int))
with open(json_file, "r") as fh:
for line in json.load(fh):
gene = line["gene"]
x = line["x"]
y = line["y"]
count = line["hits"]
spot = "{0}x{1}".format(x, y)
genes_spot_counts[spot][gene] = count
# Obtain a list of the row names (indexes)
# and list of list of gene->count for the columns (of each row)
list_row_values = list()
list_indexes = list()
for key,value in genes_spot_counts.iteritems():
list_indexes.append(key)
list_row_values.append(value)
# Create a data frame (genes as columns, spots as rows)
counts_table = pd.DataFrame(list_row_values, index=list_indexes)
# Write table to a file
counts_table.to_csv(outfile, sep="\t", na_rep=0)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument("--json-file", required=True,
help="ST data file in JSON format")
parser.add_argument("--outfile", default=None, help="Name of the output file")
args = parser.parse_args()
main(args.json_file, args.outfile)