Emacs 构造过程

Table of Contents

Emacs 架构分两层:

编译 Emacs 时,是先将 C 语言实现的编译成二进制文件“temacs”,再启动“temacs”来加载 Emacs Lisp 写的核心代码,最后借助“temacs”将加载到内存的代码以及本身写到二进制文件“emacs”“中。

src/Makefile 里可看到 temacs 是如何产生的:

temacs$(EXEEXT): $(LIBXMENU) $(ALLOBJS) \
                 $(lib)/libgnu.a $(EMACSRES) ${charsets} ${charscript}
        $(AM_V_CCLD)$(CC) $(ALL_CFLAGS) $(TEMACS_LDFLAGS) $(LDFLAGS) \
          -o temacs $(ALLOBJS) $(lib)/libgnu.a $(W32_RES_LINK) $(LIBES)
        $(MKDIR_P) $(etc)
说明:

$(EXEEXT) 用于定义扩展名,可以忽略它。

其他都是编译一些选项,最重要的是 ALLOBJS,它定义了要被编译的目标:

ALLOBJS = $(FIRSTFILE_OBJ) $(VMLIMIT_OBJ) $(obj) $(otherobj)

### 依赖关系如下:
# NS_OBJC_OBJ 可忽略
obj = $(base_obj) $(NS_OBJC_OBJ)

base_obj = dispnew.o frame.o scroll.o xdisp.o menu.o $(XMENU_OBJ) window.o \
        charset.o coding.o category.o ccl.o character.o chartab.o bidi.o \
        $(CM_OBJ) term.o terminal.o xfaces.o $(XOBJ) $(GTK_OBJ) $(DBUS_OBJ) \
        emacs.o keyboard.o macros.o keymap.o sysdep.o \
        buffer.o filelock.o insdel.o marker.o \
        minibuf.o fileio.o dired.o \
        cmds.o casetab.o casefiddle.o indent.o search.o regex.o undo.o \
        alloc.o data.o doc.o editfns.o callint.o \
        eval.o floatfns.o fns.o font.o print.o lread.o $(MODULES_OBJ) \
        syntax.o $(UNEXEC_OBJ) bytecode.o \
        process.o gnutls.o callproc.o \
        region-cache.o sound.o atimer.o \
        doprnt.o intervals.o textprop.o composite.o xml.o $(NOTIFY_OBJ) \
        $(XWIDGETS_OBJ) \
        profiler.o decompress.o \
        $(MSDOS_OBJ) $(MSDOS_X_OBJ) $(NS_OBJ) $(CYGWIN_OBJ) $(FONT_OBJ) \
        $(W32_OBJ) $(WINDOW_SYSTEM_OBJ) $(XGSELOBJ)

现在,来看 emacs 是如何编译的:

emacs$(EXEEXT): temacs$(EXEEXT) \
                lisp.mk $(etc)/DOC $(lisp) $(leimdir)/leim-list.el \
                $(lispsource)/international/charprop.el ${charsets}
ifeq ($(CANNOT_DUMP),yes)
        ln -f temacs$(EXEEXT) $@
else
        LC_ALL=C $(RUN_TEMACS) -batch -l loadup dump
        $(PAXCTL_if_present) -zex $@
        ln -f $@ bootstrap-emacs$(EXEEXT)
endif

最重要的是这句:

$(RUN_TEMACS) -batch -l loadup dump

先启动 temacs,再加载 lisp/loadup.el,loadup.el 加载了核心的 Lisp 代码之后调用 dump-emacs 函数将当前内存中的所有 Emacs Lisp 对象以及 temacs 本身写入到了新的二进制文件中,这个二进制文件就是 emacs:

(if (member (car (last command-line-args)) '("dump" "bootstrap"))
    (progn
      (message "Dumping under the name emacs")
      (condition-case ()
          (delete-file "emacs")
        (file-error nil))
      ;; We used to dump under the name xemacs, but that occasionally
      ;; confused people installing Emacs (they'd install the file
      ;; under the name `xemacs'), and it's inconsistent with every
      ;; other GNU program's build process.
      (dump-emacs "emacs" "temacs")
      (message "%d pure bytes used" pure-bytes-used)
      ;; Recompute NAME now, so that it isn't set when we dump.
      (if (not (or (eq system-type 'ms-dos)
                   ;; Don't bother adding another name if we're just
                   ;; building bootstrap-emacs.
                   (equal (last command-line-args) '("bootstrap"))))
          (let ((name (concat "emacs-" emacs-version))
                (exe (if (eq system-type 'windows-nt) ".exe" "")))
            (while (string-match "[^-+_.a-zA-Z0-9]+" name)
              (setq name (concat (downcase (substring name 0 (match-beginning 0)))
                                 "-"
                                 (substring name (match-end 0)))))
            (setq name (concat name exe))
            (message "Adding name %s" name)
            ;; When this runs on Windows, invocation-directory is not
            ;; necessarily the current directory.
            (add-name-to-file (expand-file-name (concat "emacs" exe)
                                                invocation-directory)
                              (expand-file-name name invocation-directory)
                              t)))
      (kill-emacs)))

dump-emacs 函数是用 C 语言实现的,定义在 src/emacs.c 中的:

DEFUN ("dump-emacs", Fdump_emacs, Sdump_emacs, 2, 2, 0,
       doc: /* Dump current state of Emacs into executable file FILENAME.
Take symbols from SYMFILE (presumably the file you executed to run Emacs).
This is used in the file `loadup.el' when building Emacs.

You must run Emacs in batch mode in order to dump it.  */)
  (Lisp_Object filename, Lisp_Object symfile)
{
  ...省略...

  alloc_unexec_pre ();

  unexec (SSDATA (filename), !NILP (symfile) ? SSDATA (symfile) : 0);

  alloc_unexec_post ();

  ...省略...
  return unbind_to (count, Qnil);
}

关键函数是 unexec,unexec 负责将内存的导出并生成二进制文件。

Emacs 启动那么快是因为事先就将 Lisp 代码写入到了二进制文件中,运行时就一同加载到了内存中,而不是启动时才逐个文件加载的。temacs 在 C 和 Lisp 之间起到很好的桥梁作用。

1. 核心函数定义

Emacs Lisp 中一些核心函数由 C 语言实现。定义函数的 C 语言宏是 DEFUN,定义在 src/lisp.h 里:

#ifdef _MSC_VER
#define DEFUN(lname, fnname, sname, minargs, maxargs, intspec, doc)     \
  Lisp_Object fnname DEFUN_ARGS_ ## maxargs ;                           \
  static struct Lisp_Subr alignas (GCALIGNMENT) sname =                 \
    { { (PVEC_SUBR << PSEUDOVECTOR_AREA_BITS)                           \
        | (sizeof (struct Lisp_Subr) / sizeof (EMACS_INT)) },           \
      { (Lisp_Object (__cdecl *)(void))fnname },                        \
      minargs, maxargs, lname, intspec, 0};                             \
  Lisp_Object fnname
#else  /* not _MSC_VER */
#define DEFUN(lname, fnname, sname, minargs, maxargs, intspec, doc)     \
  static struct Lisp_Subr alignas (GCALIGNMENT) sname =                 \
    { { PVEC_SUBR << PSEUDOVECTOR_AREA_BITS },                          \
      { .a ## maxargs = fnname },                                       \
      minargs, maxargs, lname, intspec, 0};                             \
  Lisp_Object fnname
#endif

在 src 目录下 grep 下看看有多少函数是 C 实现的:

eval.c:332:DEFUN ("or", For, Sor, 0, UNEVALLED, 0,
eval.c:356:DEFUN ("and", Fand, Sand, 0, UNEVALLED, 0,
eval.c:380:DEFUN ("if", Fif, Sif, 2, UNEVALLED, 0,
eval.c:400:DEFUN ("cond", Fcond, Scond, 0, UNEVALLED, 0,
eval.c:433:DEFUN ("progn", Fprogn, Sprogn, 0, UNEVALLED, 0,
eval.c:462:DEFUN ("prog1", Fprog1, Sprog1, 1, UNEVALLED, 0,
eval.c:485:DEFUN ("prog2", Fprog2, Sprog2, 2, UNEVALLED, 0,
eval.c:500:DEFUN ("setq", Fsetq, Ssetq, 0, UNEVALLED, 0,
eval.c:545:DEFUN ("quote", Fquote, Squote, 1, UNEVALLED, 0,
eval.c:563:DEFUN ("function", Ffunction, Sfunction, 1, UNEVALLED, 0,
eval.c:601:DEFUN ("defvaralias", Fdefvaralias, Sdefvaralias, 2, 3, 0,
eval.c:678:DEFUN ("default-toplevel-value", Fdefault_toplevel_value, Sdefault_toplevel_value, 1, 1, 0,
eval.c:691:DEFUN ("set-default-toplevel-value", Fset_default_toplevel_value,
eval.c:705:DEFUN ("defvar", Fdefvar, Sdefvar, 1, UNEVALLED, 0,
eval.c:785:DEFUN ("defconst", Fdefconst, Sdefconst, 2, UNEVALLED, 0,
eval.c:827:DEFUN ("internal-make-var-non-special", Fmake_var_non_special,
eval.c:838:DEFUN ("let*", FletX, SletX, 1, UNEVALLED, 0,
eval.c:900:DEFUN ("let", Flet, Slet, 1, UNEVALLED, 0,
eval.c:971:DEFUN ("while", Fwhile, Swhile, 1, UNEVALLED, 0,
eval.c:995:DEFUN ("macroexpand", Fmacroexpand, Smacroexpand, 1, 2, 0,
eval.c:1066:DEFUN ("catch", Fcatch, Scatch, 1, UNEVALLED, 0,
eval.c:1176:DEFUN ("throw", Fthrow, Sthrow, 2, 2, 0,
eval.c:1194:DEFUN ("unwind-protect", Funwind_protect, Sunwind_protect, 1, UNEVALLED, 0,
eval.c:1210:DEFUN ("condition-case", Fcondition_case, Scondition_case, 2, UNEVALLED, 0,
eval.c:1453:DEFUN ("signal", Fsignal, Ssignal, 2, 2, 0,
eval.c:1789:DEFUN ("commandp", Fcommandp, Scommandp, 1, 2, 0,
eval.c:1827:  /* Emacs primitives are interactive if their DEFUN specifies an
eval.c:1858:DEFUN ("autoload", Fautoload, Sautoload, 2, 5, 0,
eval.c:1918:DEFUN ("autoload-do-load", Fautoload_do_load, Sautoload_do_load, 1, 3, 0,
eval.c:1986:DEFUN ("eval", Feval, Seval, 1, 2, 0,
eval.c:2270:DEFUN ("apply", Fapply, Sapply, 1, MANY, 0,
eval.c:2352:DEFUN ("run-hooks", Frun_hooks, Srun_hooks, 0, MANY, 0,
eval.c:2377:DEFUN ("run-hook-with-args", Frun_hook_with_args,
eval.c:2396:DEFUN ("run-hook-with-args-until-success", Frun_hook_with_args_until_success,
eval.c:2420:DEFUN ("run-hook-with-args-until-failure", Frun_hook_with_args_until_failure,
eval.c:2450:DEFUN ("run-hook-wrapped", Frun_hook_wrapped, Srun_hook_wrapped, 2, MANY, 0,
eval.c:2630:DEFUN ("functionp", Ffunctionp, Sfunctionp, 1, 1, 0,
eval.c:2639:DEFUN ("funcall", Ffuncall, Sfuncall, 1, MANY, 0,
eval.c:2960:DEFUN ("fetch-bytecode", Ffetch_bytecode, Sfetch_bytecode,
eval.c:3264:DEFUN ("special-variable-p", Fspecial_variable_p, Sspecial_variable_p, 1, 1, 0,
eval.c:3275:DEFUN ("backtrace-debug", Fbacktrace_debug, Sbacktrace_debug, 2, 2, 0,
eval.c:3294:DEFUN ("backtrace", Fbacktrace, Sbacktrace, 0, 0, "",
eval.c:3360:DEFUN ("backtrace-frame", Fbacktrace_frame, Sbacktrace_frame, 1, 2, NULL,
eval.c:3481:DEFUN ("backtrace-eval", Fbacktrace_eval, Sbacktrace_eval, 2, 3, NULL,
eval.c:3503:DEFUN ("backtrace--locals", Fbacktrace__locals, Sbacktrace__locals, 1, 2, NULL,

eval.c 实现了 Lisp 最基本的几个函数。