Skip to content Skip to sidebar Skip to footer

Creating Multiple Nested Dictionaries From .txt File

I am trying to create a dictionary consisting of multiple dictionaries. I am creating this from a .txt file: chrY 6 8 + chrY 3 5 + chrX 10 11 + chrX 13 15 - My desired output wou

Solution 1:

You could simplify your code quite a bit by using nested defaultdict where values on the third level are lists:

from collections import defaultdict

result= defaultdict(lambda: defaultdict(lambda: defaultdict(list)))

withopen('test.txt') as f:
    forrowin f:
        ch, start, end, op = row.split()
        result[ch][op]['start'].append(start)
        result[ch][op]['end'].append(end)

import json
print(json.dumps(result, indent=4))

Output:

{"chrY":{"+":{"start":["6","3"],"end":["8","5"]}},"chrX":{"+":{"start":["10"],"end":["11"]},"-":{"start":["13"],"end":["15"]}}}

Solution 2:

One way is to use defaultdict. For exmaple:

import sys
from pprint import  pprint
from collections import defaultdict

first_dict = defaultdict(dict)
intron_dict = {}

d = dict()


defmain():
    withopen('test.csv', 'r') as intron:
        for line in intron.readlines():
            chromosome, start, end, strand, = line.split()

            if strand notin first_dict[chromosome]:
                first_dict[chromosome][strand] = defaultdict(list)

            first_dict[chromosome][strand]['start'].append(start)
            first_dict[chromosome][strand]['end'].append(end)

    pprint(first_dict)

if __name__=='__main__':
    main()

Results in:

defaultdict(<class'dict'>,
            {'chrX': {'+': defaultdict(<class'list'>,
                                       {'end': ['11'],
                                        'start': ['10']}),
                      '-': defaultdict(<class'list'>,
                                       {'end': ['15'],
                                        'start': ['13']})},
             'chrY': {'+': defaultdict(<class'list'>,
                                       {'end': ['8', '5'],
                                        'start': ['6', '3']})}})

Solution 3:

Here is another method without defaultdict. Just using if ... else

import sys
intron_dict = dict()
defmain():
    withopen(sys.argv[1], 'r') as intron:
        for line in intron.readlines():
            line = line.split()
            chromosome = line[0]
            start = int(line[1]) # converted to int to avoid quotes in result
            end = int(line[2])
            strand = line[3]
            first_dict = {strand : {'start' : [start], 'end' : [end]}}

            if intron_dict.has_key(chromosome):
                if intron_dict[chromosome].has_key(strand):
                    intron_dict[chromosome][strand]['start'].append(start)
                    intron_dict[chromosome][strand]['end'].append(end)
                else:
                    intron_dict[chromosome][strand] = first_dict[strand]
            else:
                intron_dict.setdefault(chromosome, first_dict)

        print (intron_dict)

if __name__=='__main__':
    main()

Output:

{'chrY': {'+': {'start': [6, 3], 'end': [8, 5]}}, 'chrX': {'+': {'start': [10], 'end': [11]}, '-': {'start': [13], 'end': [15]}}}

Post a Comment for "Creating Multiple Nested Dictionaries From .txt File"