Getting error while running pyspark

I am trying to create sparksession as below

< spark = SparkSession.builder
.appName(“PySpark-Get-Started”)
.getOrCreate() >

While executing I am getting file not found error. Please let me know what is missing.


FileNotFoundError Traceback (most recent call last)
Cell In[12], line 3
1 spark = SparkSession.builder
2 .appName(“PySpark-Get-Started”)
----> 3 .getOrCreate()

File ~.pyspark-env\Lib\site-packages\pyspark\sql\session.py:497, in SparkSession.Builder.getOrCreate(self)
495 sparkConf.set(key, value)
496 # This SparkContext may be an existing one.
→ 497 sc = SparkContext.getOrCreate(sparkConf)
498 # Do not update SparkConf for existing SparkContext, as it’s shared
499 # by all sessions.
500 session = SparkSession(sc, options=self._options)

File ~.pyspark-env\Lib\site-packages\pyspark\context.py:515, in SparkContext.getOrCreate(cls, conf)
513 with SparkContext._lock:
514 if SparkContext._active_spark_context is None:
→ 515 SparkContext(conf=conf or SparkConf())
516 assert SparkContext._active_spark_context is not None
517 return SparkContext._active_spark_context

File ~.pyspark-env\Lib\site-packages\pyspark\context.py:201, in SparkContext.init(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer, conf, gateway, jsc, profiler_cls, udf_profiler_cls, memory_profiler_cls)
195 if gateway is not None and gateway.gateway_parameters.auth_token is None:
196 raise ValueError(
197 “You are trying to pass an insecure Py4j gateway to Spark. This”
198 " is not allowed as it is a security risk."
199 )
→ 201 SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
202 try:
203 self._do_init(
204 master,
205 appName,
(…)
215 memory_profiler_cls,
216 )

File ~.pyspark-env\Lib\site-packages\pyspark\context.py:436, in SparkContext._ensure_initialized(cls, instance, gateway, conf)
434 with SparkContext._lock:
435 if not SparkContext._gateway:
→ 436 SparkContext._gateway = gateway or launch_gateway(conf)
437 SparkContext._jvm = SparkContext._gateway.jvm
439 if instance:

File ~.pyspark-env\Lib\site-packages\pyspark\java_gateway.py:100, in launch_gateway(conf, popen_kwargs)
97 proc = Popen(command, **popen_kwargs)
98 else:
99 # preexec_fn not supported on Windows
→ 100 proc = Popen(command, **popen_kwargs)
102 # Wait for the file to appear, or for the process to exit, whichever happens first.
103 while not proc.poll() and not os.path.isfile(conn_info_file):

File ~\AppData\Local\Programs\Python\Python312\Lib\subprocess.py:1026, in Popen.init(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, user, group, extra_groups, encoding, errors, text, umask, pipesize, process_group)
1022 if self.text_mode:
1023 self.stderr = io.TextIOWrapper(self.stderr,
1024 encoding=encoding, errors=errors)
→ 1026 self._execute_child(args, executable, preexec_fn, close_fds,
1027 pass_fds, cwd, env,
1028 startupinfo, creationflags, shell,
1029 p2cread, p2cwrite,
1030 c2pread, c2pwrite,
1031 errread, errwrite,
1032 restore_signals,
1033 gid, gids, uid, umask,
1034 start_new_session, process_group)
1035 except:
1036 # Cleanup if the child failed starting.
1037 for f in filter(None, (self.stdin, self.stdout, self.stderr)):

File ~\AppData\Local\Programs\Python\Python312\Lib\subprocess.py:1538, in Popen._execute_child(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, unused_restore_signals, unused_gid, unused_gids, unused_uid, unused_umask, unused_start_new_session, unused_process_group)
1536 # Start the process
1537 try:
→ 1538 hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
1539 # no special security
1540 None, None,
1541 int(not close_fds),
1542 creationflags,
1543 env,
1544 cwd,
1545 startupinfo)
1546 finally:
1547 # Child is launched. Close the parent’s copy of those pipe
1548 # handles that only the child should have open. You need
(…)
1551 # pipe will not close when the child process exits and the
1552 # ReadFile will hang.
1553 self._close_pipe_fds(p2cread, p2cwrite,
1554 c2pread, c2pwrite,
1555 errread, errwrite)

FileNotFoundError: [WinError 2] The system cannot find the file specified

Please read this thread about how to post code and tracebacks: About the Python Help category
You should be able to edit your post to fix the formatting.

It appears that you are trying to run a program?
Once you fix the formatting that will be a easier to read your info.
It seems that the program does not exist.

1 Like

I guess that you are using the pyspark third-party library to try to use Spark from within Python. Spark is a separate Java program, and you have to install it separately and also make sure that Pyspark knows where the Spark program is in order to be able to run it. The documentation should give more information. Another useful way to find information is with a search engine.