Skip to content

Instantly share code, notes, and snippets.

@alexruperez
Last active November 29, 2018 16:32
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alexruperez/65cc322a7d318266223cbebaa2a9033c to your computer and use it in GitHub Desktop.
Save alexruperez/65cc322a7d318266223cbebaa2a9033c to your computer and use it in GitHub Desktop.
Create your own Siri in Swift | Lil ‘Bits | https://www.youtube.com/watch?v=Sigl3dihEB8
import UIKit
import Speech
class SiriViewController: UIViewController {
private static let locale = Locale(identifier: "es-ES")
private let speechRecognizer = SFSpeechRecognizer(locale: SiriViewController.locale)!
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
private var recognitionTask: SFSpeechRecognitionTask?
private let audioEngine = AVAudioEngine()
@IBOutlet var label : UILabel!
@IBOutlet var recordButton : UIButton!
override public func viewWillAppear(_ animated: Bool) {
super.viewWillAppear(animated)
SFSpeechRecognizer.requestAuthorization { status in
if status == .authorized {
OperationQueue.main.addOperation {
self.recordButtonTapped()
}
}
}
}
private func startRecording() throws {
if let recognitionTask = recognitionTask {
recognitionTask.cancel()
self.recognitionTask = nil
}
try setAudioSessionCategory(AVAudioSessionCategoryRecord)
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
guard let recognitionRequest = recognitionRequest else {
return
}
recognitionRequest.shouldReportPartialResults = true
recognitionRequest.taskHint = .dictation
let inputNode = audioEngine.inputNode
recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in
var isFinal = false
var transcription = ""
if let result = result {
isFinal = result.isFinal
transcription = result.bestTranscription.formattedString
self.label.attributedText = self.tag(transcription)
}
if error != nil || isFinal {
self.audioEngine.stop()
inputNode.removeTap(onBus: 0)
self.recognitionRequest = nil
self.recognitionTask = nil
try? self.read(transcription)
self.recordButton.setTitle("¡Oye Siri!", for: [])
}
}
let recordingFormat = inputNode.outputFormat(forBus: 0)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
self.recognitionRequest?.append(buffer)
}
audioEngine.prepare()
try audioEngine.start()
label.text = "Cuéntame..."
}
func setAudioSessionCategory(_ category: String) throws {
let audioSession = AVAudioSession.sharedInstance()
try audioSession.setCategory(category)
try audioSession.setMode(AVAudioSessionModeMeasurement)
try audioSession.setActive(true, with: .notifyOthersOnDeactivation)
}
@IBAction func recordButtonTapped() {
if audioEngine.isRunning {
audioEngine.stop()
recognitionRequest?.endAudio()
} else {
try? startRecording()
recordButton.setTitle("¿Que he dicho?", for: [])
}
}
func tag(_ text: String) -> NSAttributedString {
let tagger = NSLinguisticTagger(tagSchemes: [.lemma, .nameTypeOrLexicalClass], options: 0)
tagger.string = text
let range = NSRange(location: 0, length: text.utf16.count)
var words = [String]()
var bagOfWords = [String: CGFloat]()
var lemmas = [String: String]()
tagger.enumerateTags(in: range, unit: .word, scheme: .lemma, options: []) { tag, tokenRange, _ in
let word = (text as NSString).substring(with: tokenRange)
words.append(word)
lemmas[word] = tag?.rawValue
if bagOfWords[word] != nil {
bagOfWords[word]! += 1
} else {
bagOfWords[word] = 1
}
}
var colors = [String: UIColor]()
tagger.enumerateTags(in: range, unit: .word, scheme: .nameTypeOrLexicalClass, options: []) { tag, tokenRange, _ in
let word = (text as NSString).substring(with: tokenRange)
colors[word] = tag?.colorValue
}
return compose(words, bagOfWords: bagOfWords, lemmas: lemmas, colors: colors)
}
private func compose(_ words: [String], bagOfWords: [String: CGFloat], lemmas: [String: String], colors: [String: UIColor]) -> NSAttributedString {
let result = NSMutableAttributedString()
words.forEach { word in
let fontSize = 15 + (bagOfWords[word] ?? 0)
let attributedWord = NSAttributedString(string: lemmas[word] ?? word,
attributes: [.foregroundColor : colors[word] ?? .black,
.font: UIFont.systemFont(ofSize: fontSize)])
result.append(attributedWord)
}
return result
}
func read(_ text: String) throws {
try setAudioSessionCategory(AVAudioSessionCategoryPlayback)
let speechUtterance = AVSpeechUtterance(string: text)
speechUtterance.voice = AVSpeechSynthesisVoice(language: SiriViewController.locale.identifier)
let speechSynthesizer = AVSpeechSynthesizer()
speechSynthesizer.speak(speechUtterance)
}
}
extension NSLinguisticTag {
public var colorValue: UIColor {
switch self {
case .noun: return .red
case .verb: return .green
case .adjective: return .blue
case .adverb: return .cyan
case .pronoun: return .yellow
case .determiner: return .magenta
case .particle: return .gray
case .preposition: return .darkGray
case .number: return .lightGray
case .personalName: return .orange
case .placeName: return .purple
case .organizationName: return .brown
default: return .black
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment