PySpark SparkConf() equivalent of spark command option "--jars" - java

I'd like to run some PySpark script on JupyterLab, and create custom UDF from JAR packages. To do so I need to broadcast these JAR packages to executor nodes. This answer has showed the command line interface approach (invoking --jars option in spark-submit). But I'd like to know the SparkConf() approach. On my JupyterLab sc.version=3.3.0-SNAPSHOT.
I'm very new to Spark.. your help will be highly appreciated!
Code:
import findspark
findspark.init()
findspark.find()
import pyspark
from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession
import os
# ------------ create spark session ------------
app_name = 'PySpark_Example'
path = os.getcwd()
conf = SparkConf().setAppName(os.environ.get('JUPYTERHUB_USER').replace(" ", "") + "_" + app_name).setMaster(
'spark://spark-master-svc.spark:7077')
command = os.popen("hostname -i")
hostname = command.read().split("\n")[0]
command.close()
conf.set("spark.scheduler.mode","FAIR")
conf.set("spark.deployMode","client")
conf.set("spark.driver.host",hostname)
conf.set('spark.extraListeners','sparkmonitor.listener.JupyterSparkMonitorListener')
conf.set("spark.jars", "{path}/my_func.jar,{path}/javabuilder.jar".format(path=path))
conf.set("spark.executor.extraClassPath", "{path}/".format(path=path))
sc = pyspark.SparkContext(conf=conf)
spark = SparkSession(sc)
spark._jsc.addJar("{}/my_func.jar".format(path))
spark._jsc.addJar("{}/javabuilder.jar".format(path))
# ------------- create sample dataframe ---------
sdf = spark.createDataFrame(
[
(1, 2.),
(2, 3.),
(3, 5.),
],
["col1", "col2"]
)
sdf.createOrReplaceTempView("temp_table")
# -------------- create UDF ----------------------
create_udf_from_jar = "CREATE OR REPLACE FUNCTION my_func AS 'my_func.Class1' " + \
"USING JAR '{}/my_func.jar'".format(path)
spark.sql(create_udf_from_jar)
spark.sql("SHOW USER FUNCTIONS").show()
# -------------- test ----------------------------
spark.sql("SELECT my_func(col1) FROM temp_table").show()
Error:
---------------------------------------------------------------------------
Py4JJavaError Traceback (most recent call last)
~tmp/ipykernel_4398/644670379.py in <cell line: 1>()
----> 1 spark.sql("SELECT my_func(col1) FROM temp_table").show()
~opt/spark/python/pyspark/sql/session.py in sql(self, sqlQuery, **kwargs)
1033 sqlQuery = formatter.format(sqlQuery, **kwargs)
1034 try:
-> 1035 return DataFrame(self._jsparkSession.sql(sqlQuery), self._wrapped)
1036 finally:
1037 if len(kwargs) > 0:
~opt/spark/python/lib/py4j-0.10.9.3-src.zip/py4j/java_gateway.py in __call__(self, *args)
1319
1320 answer = self.gateway_client.send_command(command)
-> 1321 return_value = get_return_value(
1322 answer, self.gateway_client, self.target_id, self.name)
1323
~opt/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
188 def deco(*a: Any, **kw: Any) -> Any:
189 try:
--> 190 return f(*a, **kw)
191 except Py4JJavaError as e:
192 converted = convert_exception(e.java_exception)
~opt/spark/python/lib/py4j-0.10.9.3-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
324 value = OUTPUT_CONVERTER[type](answer[2:], gateway_client)
325 if answer[1] == REFERENCE_TYPE:
--> 326 raise Py4JJavaError(
327 "An error occurred while calling {0}{1}{2}.\n".
328 format(target_id, ".", name), value)
Py4JJavaError: An error occurred while calling o3813.sql.
: java.lang.NoClassDefFoundError: com/mathworks/toolbox/javabuilder/internal/MWComponentInstance

spark.jars is the one you're looking for (Doc)

Related

JavaObject from Netlogo has no length using py4j?

I am running nl4py (a python module for NetLogo) in Jupyter notebook. I am trying to get import a list from netlogo into python, but the import is in a Java format. However, when I try to convert the JavaObject to a python format using py4j I get an error of: JavaObject has no len(). Is there a better way to convert JavaObject in python? Thanks.
python 3.8, ipython 7.10.0, nl4py 0.5.0, jdk 15.0.2, Netlogo 6.0, MacOS Catalina 10.15.7
#start of code for nl4py
import nl4py
nl4py.startServer("/Applications/NetLogo 6.0/")
n = nl4py.NetLogoApp()
n.openModel('/Users/tracykuper/Desktop/Netlogo models/Mucin project/1_21_20/PA_metabolite_model_1_21.nlogo')
n.command("setup")
#run abm model for n number of times
#change patch variable under a specific turtle
for i in range(1):
n.command("repeat 10 [go]")
#A = np.array([1,2,3,4],[3,2,-1,-6])) #turtle number, metabolite diff.
#run simulation of metabolic network to get biomass and metabolite values
#change patch variable under a specific turtle
names = ["1", "2", "3"] #turtle names
patch_values = ["-0.5", "50", "-0.5"] #metabolite values
for i in range(len(names)):
x = ('ask turtle {} [ask patch-here [set succinate succinate + {}]]'.format(names[i],patch_values[i]))
n.command(x)
#set new bacteria mass values
values = ["5", "30", "5"] #biomass values
y = ('ask turtle {} [set m m + {}]'.format(names[i],values[i]))
n.command(y)
n.command("ask turtle {} [set color red]".format(names[i]))
import py4j
mass = n.report("mass-list")
print(mass)
self = n.report("self-list")
type(mass)
s = py4j.protocol.get_return_value(mass, object)
[[0.69], [0.8], [0.73], [0.71], [0.5], [0.51], [0.54], [0.82], [0.72], [0.88]]
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-31-0b04d0127b47> in <module>
11 #map(mass + mass,mass)
12
---> 13 s = py4j.protocol.get_return_value(mass, object)
~/opt/anaconda3/envs/netlogo4/lib/python3.6/site-packages/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
319 (e.g., *hello* in `object1.hello()`). Optional.
320 """
--> 321 if is_error(answer)[0]:
322 if len(answer) > 1:
323 type = answer[1]
~/opt/anaconda3/envs/netlogo4/lib/python3.6/site-packages/py4j/protocol.py in is_error(answer)
372
373 def is_error(answer):
--> 374 if len(answer) == 0 or answer[0] != SUCCESS:
375 return (True, None)
376 else:
TypeError: object of type 'JavaObject' has no len()

: java.lang.NoSuchMethodError: 'scala.collection.mutable.ArrayOps scala.Predef$.refArrayOps(java.lang.Object[])'

I was trying to implement the DBSCAN model in PySpark framework. Github link of complete implementation . I just copied and run the code from 'README.md' segment. The code is
import dbscan
from sklearn.datasets import make_blobs
from pyspark.sql import types as T, SparkSession
from scipy.spatial import distance
spark = SparkSession \
.builder \
.appName("DBSCAN") \
.config("spark.jars.packages", "graphframes:graphframes:0.7.0-spark2.3-s_2.11") \
.config('spark.driver.host', '127.0.0.1') \
.getOrCreate()
X, labels_true = make_blobs(n_samples=750, centers=centers, cluster_std=0.4, random_state=5)
data = [(i, [float(item) for item in X[i]]) for i in range(X.shape[0])]
schema = T.StructType([T.StructField("id", T.IntegerType(), False),
T.StructField("value", T.ArrayType(T.FloatType()), False)])
df = spark.createDataFrame(data, schema=schema)
df_clusters = dbscan.process(spark, df, .2, 10, distance.euclidean, 2, "checkpoint")
which shows me the error message as
-
**> --------------------------------------------------------------------------
Py4JJavaError Traceback (most recent call
last)
in ()
12 T.StructField("value", T.ArrayType(T.FloatType()), False)])
13 df = spark.createDataFrame(data, schema=schema)
---> 14 df_clusters = dbscan.process(spark, df, .2, 10, distance.euclidean, 2, "checkpoint")
4 frames
/usr/local/lib/python3.6/dist-packages/py4j/protocol.py in
get_return_value(answer, gateway_client, target_id, name)
326 raise Py4JJavaError(
327 "An error occurred while calling {0}{1}{2}.\n".
--> 328 format(target_id, ".", name), value)
329 else:
330 raise Py4JError(
Py4JJavaError: An error occurred while calling o185.createGraph. :
java.lang.NoSuchMethodError: 'scala.collection.mutable.ArrayOps
scala.Predef$.refArrayOps(java.lang.Object[])' at
org.graphframes.GraphFrame$.apply(GraphFrame.scala:676) at
org.graphframes.GraphFramePythonAPI.createGraph(GraphFramePythonAPI.scala:10)
at
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
Method) at
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:566) at
py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) at
py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) at
py4j.Gateway.invoke(Gateway.java:282) at
py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79) at
py4j.GatewayConnection.run(GatewayConnection.java:238) at
java.base/java.lang.Thread.run(Thread.java:834)**
I really don't know anything on Java or scala ; also beginner at pyspark . Kindly help me to solve out this problem , I need it badly!

Could not load main class from .java class file

I'm using Python to visualize a graph through a tool named wot using jupyter notebook. It utilizes Gephi, a java-based graph utility. I try to run function to return coordinate output files as below:
run_gephi(input_graph_file, output_coord_file, n_steps):
layout = 'fa'
import psutil
memory = int(0.5 * psutil.virtual_memory()[0] * 1e-9)
classpath = os.path.dirname(
pkg_resources.resource_filename('wot', 'commands/resources/graph_layout/GraphLayout.class')) + ':' + \
pkg_resources.resource_filename('wot', 'commands/resources/graph_layout/gephi-toolkit-0.9.2-all.jar')
subprocess.check_call(['java', '-Djava.awt.headless=true', '-Xmx{memory}g'.format(memory=memory), '-cp', classpath, \
'GraphLayout', input_graph_file, output_coord_file, layout, str(n_steps), str(os.cpu_count())])
Then it returns following error in my jupyter notebook:
CalledProcessError Traceback (most recent call last)
<ipython-input-18-5fc832689b87> in <module>
----> 1 df, adata = compute_force_layout(ds)
<ipython-input-7-6cb84b9e0fa0> in compute_force_layout(ds, n_neighbors, n_comps, neighbors_diff, n_steps)
24 writer.write("{u} {v} {w:.6g}\n".format(u=i + 1, v=j + 1, w=W[i, j]))
25
---> 26 run_gephi(input_graph_file, output_coord_file, n_steps)
27 # replace numbers with cids
28 df = pd.read_table(output_coord_file, header=0, index_col='id')
<ipython-input-16-28772d0d10cc> in run_gephi(input_graph_file, output_coord_file, n_steps)
7 pkg_resources.resource_filename('wot', 'commands/resources/graph_layout/gephi-toolkit-0.9.2-all.jar')
8 subprocess.check_call(['java', '-Djava.awt.headless=true', '-Xmx{memory}g'.format(memory=memory), '-cp', classpath, \
----> 9 'GraphLayout', input_graph_file, output_coord_file, layout, str(n_steps), str(os.cpu_count())])
~/anaconda3/lib/python3.7/subprocess.py in check_call(*popenargs, **kwargs)
339 if cmd is None:
340 cmd = popenargs[0]
--> 341 raise CalledProcessError(retcode, cmd)
342 return 0
343
CalledProcessError: Command '['java', '-Djava.awt.headless=true', '-Xmx25g', '-cp', '/home/iik/.local/lib/python3.7/site-packages/wot/commands/resources/graph_layout:/home/iik/.local/lib/python3.7/site-packages/wot/commands/resources/graph_layout/gephi-toolkit-0.9.2-all.jar', 'GraphLayout', '/tmp/gephiznxedn32.net', '/tmp/coordsd64x05ww.txt', 'fa', '10000', '8']' returned non-zero exit status 1.
and following message was found in terminal
Error: Could not find or load main class GraphLayout
I can found "GraphLayout.java" and "gephi-toolkit-0.9.2-all.jar" files in the path, so I really don't know why it can't be loaded.
Do you have any suggestions?
Add *
The class GraphLayout is not contained in Gephi but defined by GraphLayout.java.

Updating module version when updating version in dependencies (multi-module maven )

My problem: versions-maven-plugin helps me to up version in some module (let's call it A) in my multi-module maven project.
Some modules (let's call it B and C) in this project have in dependencies module A. I need to up versions for this modules (B and C) too. Sometimes, i also need to up version in other module (B-parent) where B (or C) in dependencies (A version up -> B version up -> B-parent version up). Other problem is the modules can be at different levels of nesting.
Example:
root:
---B-parent:
---B (A in dependencies)
---C-parent
---C (A in dependencies)
---A-parent:
---A
Process: A version up -> A-parent version up, C version-up -> C-parent version-up, B version-up -> B-parent version up.
This plugin can't do this.
Is there any idea how this can be done?
Or my strategy of updating versions is not good enough?
I've made a script for increasing version numbers in all dependent modules recursively with a versions-maven-plugin.
Algorithm is as follows:
Run versions:set in target module
Run versions:set in all modules which have been updated by versions:set from previous step. If the module has been already processed - skip it.
Repeat step 2
Python 2.7 code
#!/usr/bin/env python
# -*- coding: utf-8 -*- #
# How To
#
# Run script and pass module path as a first argument.
# Or run it without arguments in module dir.
#
# Script will request the new version number for each module.
# If no version provided - last digit will be incremented (1.0.0 -> 1.0.1).
# cd <module-path>
# <project-dir>/increment-version.py
# ...
# review changes and commit
from subprocess import call, Popen, PIPE, check_output
import os
import re
import sys
getVersionCommand = "mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate " \
"-Dexpression=project.version 2>/dev/null | grep -v '\['"
def getCurrentModuleVersion():
return check_output(getVersionCommand, shell=True).decode("utf-8").split("\n")[0]
def incrementLastDigit(version):
digits = version.split(".")
lastDigit = int(digits[-1])
digits[-1] = str(lastDigit+1)
return ".".join(digits)
def isUpdatedVersionInFile(version, file):
return "<version>" + version + "</version>" in \
check_output("git diff HEAD --no-ext-diff --unified=0 --exit-code -a --no-prefix {} "
"| egrep \"^\\+\"".format(file), shell=True).decode("utf-8")
def runVersionSet(version):
process = Popen(["mvn", "versions:set", "-DnewVersion="+version, "-DgenerateBackupPoms=false"], stdout=PIPE)
(output, err) = process.communicate()
exitCode = process.wait()
if exitCode is not 0:
print "Error setting the version"
exit(1)
return output, err, exitCode
def addChangedPoms(version, dirsToVisit, visitedDirs):
changedFiles = check_output(["git", "ls-files", "-m"]) \
.decode("utf-8").split("\n")
changedPoms = [f for f in changedFiles if f.endswith("pom.xml")]
changedDirs = [os.path.dirname(os.path.abspath(f)) for f in changedPoms if isUpdatedVersionInFile(version, f)]
changedDirs = [d for d in changedDirs if d not in visitedDirs and d not in dirsToVisit]
print "New dirs to visit:", changedDirs
return changedDirs
if __name__ == "__main__":
visitedDirs = []
dirsToVisit = []
if len(sys.argv) > 1:
if os.path.exists(os.path.join(sys.argv[1], "pom.xml")):
dirsToVisit.append(os.path.abspath(sys.argv[1]))
else:
print "Error. No pom.xml file in dir", sys.argv[1]
exit(1)
else:
dirsToVisit.append(os.path.abspath(os.getcwd()))
pattern = re.compile("aggregation root: (.*)")
while len(dirsToVisit) > 0:
dirToVisit = dirsToVisit.pop()
print "Visiting dir", dirToVisit
os.chdir(dirToVisit)
currentVersion = getCurrentModuleVersion()
defaultVersion = incrementLastDigit(currentVersion)
version = raw_input("New version for {}:{} ({}):".format(dirToVisit, currentVersion, defaultVersion))
if not version.strip():
version = defaultVersion
print "New version:", version
output, err, exitcode = runVersionSet(version)
rootDir = pattern.search(output).group(1)
visitedDirs = visitedDirs + [dirToVisit]
os.chdir(rootDir)
print "Adding new dirs to visit"
dirsToVisit = dirsToVisit + addChangedPoms(version, dirsToVisit, visitedDirs)

Error running Pythontranscriber.py

When I try to run pythontranscriber.py, it fails with the following error:
Traceback (most recent call last):
File "PythonTranscriber.py", line 14, in <module>
from edu.cmu.sphinx.decoder import Decoder
ImportError: No module named edu
My script PythonTranscriber.py:
import sys
libDir = "/home/karen/sphinx4-1.0beta5-scr/sphinx4-1.0beta5/src/sphinx4/"
classPaths = [
"sphinx4.jar",
"jsapi.jar" ]
for classPath in classPaths:
sys.path.append(libDir + classPath)
true = 1
false = 0
from edu.cmu.sphinx.decoder import Decoder
from edu.cmu.sphinx.decoder import ResultListener
from edu.cmu.sphinx.decoder.pruner import SimplePruner
from edu.cmu.sphinx.decoder.scorer import ThreadedAcousticScorer
from edu.cmu.sphinx.decoder.search import PartitionActiveListFactory
from edu.cmu.sphinx.decoder.search import SimpleBreadthFirstSearchManager
from edu.cmu.sphinx.frontend import DataBlocker
from edu.cmu.sphinx.frontend import FrontEnd
from edu.cmu.sphinx.frontend.endpoint import NonSpeechDataFilter
from edu.cmu.sphinx.frontend.endpoint import SpeechClassifier
from edu.cmu.sphinx.frontend.endpoint import SpeechMarker
from edu.cmu.sphinx.frontend.feature import DeltasFeatureExtractor
from edu.cmu.sphinx.frontend.feature import LiveCMN
from edu.cmu.sphinx.frontend.filter import Preemphasizer
from edu.cmu.sphinx.frontend.frequencywarp import MelFrequencyFilterBank
from edu.cmu.sphinx.frontend.transform import DiscreteCosineTransform
from edu.cmu.sphinx.frontend.transform import DiscreteFourierTransform
from edu.cmu.sphinx.frontend.util import AudioFileDataSource
from edu.cmu.sphinx.frontend.window import RaisedCosineWindower
from edu.cmu.sphinx.instrumentation import BestPathAccuracyTracker
from edu.cmu.sphinx.instrumentation import MemoryTracker
from edu.cmu.sphinx.instrumentation import SpeedTracker
from edu.cmu.sphinx.jsapi import JSGFGrammar
from edu.cmu.sphinx.linguist.acoustic import UnitManager
from edu.cmu.sphinx.linguist.acoustic.tiedstate import Sphinx3Loader
from edu.cmu.sphinx.linguist.acoustic.tiedstate import TiedStateAcousticModel
from edu.cmu.sphinx.linguist.dictionary import FastDictionary
from edu.cmu.sphinx.linguist.flat import FlatLinguist
from edu.cmu.sphinx.recognizer import Recognizer
from edu.cmu.sphinx.util import LogMath
from java.util.logging import Logger
from java.util.logging import Level
from java.net import URL
from java.util import ArrayList
# if (args.length < 1) {
# throw new Error("USAGE: GroovyTranscriber <sphinx4 root> [<WAV file>]")
# }
root = "../../.."
# init common
Logger.getLogger("").setLevel(Level.WARNING)
logMath = LogMath(1.0001, true)
absoluteBeamWidth = -1
relativeBeamWidth = 1E-80
wordInsertionProbability = 1E-36
languageWeight = 8.0
# init audio data
audioSource = AudioFileDataSource(3200, None)
audioURL = URL("file:" + root + "/src/apps/edu/cmu/sphinx/demo/transcriber/10001-90210-01803.wav")
# (args.length > 1) ?
# File(args[0]).toURI().toURL() : audioSource.setAudioFile(audioURL, None)
# init front end
dataBlocker = DataBlocker(
10 # blockSizeMs ) speechClassifier = SpeechClassifier(
10, # frameLengthMs,
0.003, # adjustment,
10, # threshold,
0 # minSignal
)
speechMarker = SpeechMarker(
200, # startSpeechTime,
500, # endSilenceTime,
100, # speechLeader,
50, # speechLeaderFrames
100 # speechTrailer
)
nonSpeechDataFilter = NonSpeechDataFilter()
premphasizer = Preemphasizer(
0.97 # preemphasisFactor
)
windower = RaisedCosineWindower(
0.46, # double alpha
25.625, # windowSizeInMs
10.0 # windowShiftInMs
)
fft = DiscreteFourierTransform(
-1, # numberFftPoints
false # invert
)
melFilterBank = MelFrequencyFilterBank(
130.0, # minFreq,
6800.0, # maxFreq,
40 # numberFilters
)
dct = DiscreteCosineTransform(
40, # numberMelFilters,
13 # cepstrumSize
)
cmn = LiveCMN(
12.0, # initialMean,
100, # cmnWindow,
160 # cmnShiftWindow
)
featureExtraction = DeltasFeatureExtractor(
3 # window
)
pipeline = [
audioSource,
dataBlocker,
speechClassifier,
speechMarker,
nonSpeechDataFilter,
premphasizer,
windower,
fft,
melFilterBank,
dct,
cmn,
featureExtraction ]
frontend = FrontEnd(pipeline)
# init models
unitManager = UnitManager()
modelLoader = Sphinx3Loader(
"file:" + root + "/models/acoustic/tidigits/model.props",
logMath,
unitManager,
true,
true,
39,
"file:" + root + "/models/acoustic/tidigits/wd_dependent_phone.500.mdef",
"file:" + root + "/models/acoustic/tidigits/wd_dependent_phone.cd_continuous_8gau/",
0.0,
1e-7,
0.0001,
true)
model = TiedStateAcousticModel(modelLoader, unitManager, true)
dictionary = FastDictionary(
URL("file:" + root + "/models/acoustic/tidigits/dictionary"),
URL("file:" + root + "/models/acoustic/tidigits/fillerdict"),
ArrayList(),
false,
"<sil>",
false,
false,
unitManager)
# init linguist
grammar = JSGFGrammar(
# URL baseURL,
URL("file:" + root + "/src/apps/edu/cmu/sphinx/demo/transcriber/"),
logMath, # LogMath logMath,
"digits", # String grammarName,
false, # boolean showGrammar,
false, # boolean optimizeGrammar,
false, # boolean addSilenceWords,
false, # boolean addFillerWords,
dictionary # Dictionary dictionary
)
linguist = FlatLinguist(
model, # AcousticModel acousticModel,
logMath, # LogMath logMath,
grammar, # Grammar grammar,
unitManager, # UnitManager unitManager,
wordInsertionProbability, # double wordInsertionProbability,
1.0, # double silenceInsertionProbability,
1.0, # double fillerInsertionProbability,
1.0, # double unitInsertionProbability,
languageWeight, # float languageWeight,
false, # boolean dumpGStates,
false, # boolean showCompilationProgress,
false, # boolean spreadWordProbabilitiesAcrossPronunciations,
false, # boolean addOutOfGrammarBranch,
1.0, # double outOfGrammarBranchProbability,
1.0, # double phoneInsertionProbability,
None # AcousticModel phoneLoopAcousticModel
)
# init recognizer
scorer = ThreadedAcousticScorer(frontend, None, 10, true, 0)
pruner = SimplePruner()
activeListFactory = PartitionActiveListFactory(absoluteBeamWidth, relativeBeamWidth, logMath)
searchManager = SimpleBreadthFirstSearchManager(
logMath, linguist, pruner,
scorer, activeListFactory,
false, 0.0, 0, false)
decoder = Decoder(searchManager,
false, false,
ArrayList(),
100000)
recognizer = Recognizer(decoder, None)
# allocate the resourcs necessary for the recognizer recognizer.allocate()
# Loop unitl last utterance in the audio file has been decoded, in which case the recognizer will return None.
result = recognizer.recognize()
while (result != None):
resultText = result.getBestResultNoFiller()
print resultText
result = recognizer.recognize()
I have Jython installed already!
Please help me!
To load Java code, try adding jars to your classpath.
If you're not familiar with how to do that, either use
export CLASSPATH=$CLASSPATH:/path/to/sphinx4.jar:/path/jsapi.jar
Or add it to your command line script:
java -cp /path/to/jars -jar /home/karen/jython-installer-2.5.3/jython.jar PythonTranscriber.py
EDIT:
So, I followed the instructions here to download and install Sphinx-4. The path to the jar files was different than you had in your script. They were under the sphinx4/lib directory. You may want to double-check the correctness of your path.
I ran your script as below (modified, but no major changes):
import sys,os
lib_dir = '/home/.../jars/sphinx4/lib/'
classpaths = [ 'sphinx4.jar', 'jsapi.jar' ]
for cp in classpaths:
sys.path.append(os.path.join(lib_dir,cp))
from edu.cmu.sphinx.decoder import Decoder
from ... import ...
etc.
from java.util import ArrayList
with the command:
jython python_transcriber.py
This almost worked, except that you have one more import error:
from edu.cmu.sphinx.jsapi import JSGFGrammar
should, according to the javadoc, be:
from edu.cmu.sphinx.jsgf import JSGFGrammar
That took care of it for me. Didn't try the rest of the code, but at least all the imports work.

Categories

Resources