transcription = ds.sttWithMetadata(audio, fs)
print(metadata_to_string(transcription))
print(transcription.num_items)
print(transcription.items[0].character)
print(transcription.items[0].timestep)
print(transcription.items[0].start_time)
print(transcription.confidence)
the moon santiago a seminarian in eeeeeeeeeeeeeeeeelllllll nineteen o nine in a eeeeeeeeeeeeeeeeelllllll nineteen o nine eeeeeeeeeeeeeeeeelllllll eeeeeeeeeeeeeeeeelllllll eeeeeeeeeeeeeeeeelllllll
195
t
15
0.29999998211860657
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-27-1b7a0387587d> in <module>
4 print(transcription.items[0].timestep)
5 print(transcription.items[0].start_time)
----> 6 print(transcription.confidence)
AttributeError: 'impl.Metadata' object has no attribute 'confidence'
typedef struct MetadataItem {
/** The character generated for transcription */
char* character;
/** Position of the character in units of 20ms */
int timestep;
/** Position of the character in seconds */
float start_time;
} MetadataItem;
/**
* @brief Stores the entire CTC output as an array of character metadata objects
*/
typedef struct Metadata {
/** List of items */
MetadataItem* items;
/** Size of the list of items */
int num_items;
/** Approximated confidence value for this transcription. This is roughly the
* sum of the acoustic model logit values for each timestep/character that
* contributed to the creation of this transcription.
*/
double confidence;
} Metadata;
@Abhinav Care to share the version you are working ? confidence
was named differently in previous releases.
TensorFlow: v1.13.1-10-g3e0cc53
DeepSpeech: v0.5.1-0-g4b29b78
So you should check v0.5.1 documentation: https://github.com/mozilla/DeepSpeech/blob/v0.5.1/native_client/deepspeech.h#L19-L31