Project management of NTIS P1 Cybernetic Systems and Department of Cybernetics | WiKKY

Project

General

Profile

Task #3941 » asflight.py

Hanzlíček Zdeněk, 16.06.2016 16:11

 
1
#!/usr/bin/python
2
# -*- coding: utf-8 -*-
3

    
4
# --------------------------------------------------------------- #
5
#    Library for a simple processing of ASF files.                #
6
# --------------------------------------------------------------- #
7
#    2011 - 2016  Zdenek Hanzlicek (zhanzlic@ntis.zcu.cz)         #
8
#                 NTIS, University of West Bohemia                #
9
# --------------------------------------------------------------- #
10

    
11
# SVN $Id: asflight.py 1756 2016-04-15 13:39:57Z zhanzlic $
12

    
13

    
14
import codecs
15
import os
16
import os.path
17

    
18

    
19
class AsfLight:
20

    
21
    def __init__( self, file_name=None, code_page='utf-8' ):
22

    
23
        self.utts = dict()  # particular utterances
24
        self.attrib_order = list()  # order of attributes for all utterances
25
        self.header = list()  # list of lines with comments
26

    
27
        if file_name is not None:
28
            self.read( file_name )
29

    
30

    
31
    ## ==========----------
32

    
33

    
34
    def __len__( self ):
35
        return len( self.utts )
36

    
37

    
38
    ## ==========----------
39

    
40

    
41
    def __iter__( self ):
42
        return iter( self.utts )
43

    
44

    
45
    ## ==========----------
46

    
47

    
48
    def __setitem__( self, key, value ):
49
        self.utts[ key ] = value
50

    
51

    
52
    ## ==========----------
53

    
54

    
55
    def __getitem__( self, key ):
56
        return self.utts[ key ]
57

    
58

    
59
    ## ==========----------
60

    
61

    
62
    def __contains__( self, item ):
63
        return item in self.utts
64

    
65

    
66
    ## ==========----------
67

    
68

    
69
    def read( self, file_name, code_page='utf-8' ):
70

    
71
        asf_handle = codecs.open( file_name, 'rt', code_page )
72
        asf_content = asf_handle.readlines()
73
        asf_handle.close()
74

    
75
        for asf_line in asf_content:
76
            asf_line = asf_line.strip()
77

    
78
            if asf_line == "":  # empty line
79
                continue
80

    
81
            if asf_line.startswith("#"):  # header / comment
82
                self.header.append( asf_line )
83

    
84
            elif asf_line.startswith('"') and asf_line.endswith('"'):  # new utterance
85
                utt_name = asf_line[1:-1]
86
                utt_units = list()
87
                self.utts[ utt_name ] = utt_units
88

    
89
            elif asf_line.startswith("|") and asf_line.endswith('|'):  # unit
90
                attrib_vals = [ attrib_val.strip() for attrib_val in asf_line[1:-1].split("|") ]
91
                utt_units.append( { self.attrib_order[ idx ]:attrib_vals[ idx ] for idx in range( len( self.attrib_order ) ) } )
92

    
93
            elif asf_line.startswith("[") and asf_line.endswith("]"):  # list of attribute names
94
                self.attrib_order = [ attrib_name.strip() for attrib_name in asf_line[1:-1].split("|") ]
95

    
96

    
97
    ## ==========----------
98

    
99

    
100
    def write( self, file_name, code_page='utf-8' ):
101

    
102
        asf_handle = codecs.open( file_name, 'wt', code_page )
103

    
104
        if len( self.header ):
105
            asf_handle.write( "\n".join( self.header ) )
106
            asf_handle.write( "\n\n" )
107

    
108
        attrib_lens = { attrib_name:len( attrib_name ) for attrib_name in self.attrib_order }
109

    
110
        # get the maximum lenghts for particular attributes
111
        for units in self.utts.itervalues():
112
            for unit in units:
113
                for attrib_name in unit:
114

    
115
                    attrib_val = unit[ attrib_name ]
116
                    if not isinstance( attrib_val, unicode ):
117
                        attrib_len = len( unicode( attrib_val ) )
118
                    else:
119
                        attrib_len = len( attrib_val )
120

    
121
                    if attrib_lens[ attrib_name ] < attrib_len:
122
                        attrib_lens[ attrib_name ] = attrib_len
123

    
124
        # write list of attribute names
125
        asf_handle.write( "[ " + " | ".join( [ attrib_name + " "*( attrib_lens[ attrib_name ] - len( attrib_name ) ) for attrib_name in self.attrib_order ] ) + " ]\n\n" )
126

    
127
        for utt_name in sorted( self.utts.iterkeys() ):
128
            asf_handle.write( '"' + utt_name + '"\n' )
129

    
130
            for unit in self.utts[ utt_name ]:
131
                attrib_vals = { attrib_name:( unit[ attrib_name ] if isinstance( unit[ attrib_name ], unicode ) else unicode( unit[ attrib_name ] ) ) for attrib_name in self.attrib_order }
132
                asf_handle.write( "| " + " | ".join( [ attrib_vals[ attrib_name ] + " "*( attrib_lens[ attrib_name ] - len( attrib_vals[ attrib_name ] ) ) for attrib_name in self.attrib_order ] ) + " |\n" )
133

    
134
            asf_handle.write( "\n" )
135

    
136
        asf_handle.close()
137

    
(1-1/2)