（2）开发LLVM项目 · 编译器架构的王者LLVM

LLVM平台，短短几年间，改变了众多编程语言的走向，也催生了一大批具有特色的编程语言的出现，不愧为编译器架构的王者，也荣获2012年ACM软件系统奖 —— 题记版权声明：本文为西风逍遥游原创文章，转载请注明出处西风世界 [http://blog.csdn.net/xfxyy_sxfancy](http://blog.csdn.net/xfxyy_sxfancy) ### 开发LLVM项目介绍了LLVM这么多，那么我们能用LLVM做一款自己的编程语言么？答案是，有点难度，但不是不可行。只要你熟悉C++编程，而且有足够的热情，那么就没有什么能阻止你了。下面我就来介绍一下，LLVM项目的基本方法。需要的东西： LLVM平台库，文档，CMAKE，C++编译器 ### 环境搭建首先我的系统是Ubuntu14.04，我就介绍Ubuntu下的配置方法了，用Windows的朋友就不好意思了。安装llvm-3.6及其开发包： ~~~ sudo apt-get install llvm-3.6* ~~~ 一般是推荐将文档和示例都下载下来的，因为比较这些对应版本的参考很重要，很多网上的代码，都是特定版本有效，后来就有API变更的情况。所以大家一定注意版本问题，我开发的时候，源里面的版本最高就3.6，我也不追求什么最新版本，新特性什么的，所以声明一下，本系列教程的LLVM版本均为3.6版，文档参考也为3.6版。 ~~~ sudo apt-get install clang cmake ~~~ clang编译器，我个人感觉比gcc好用许多倍，而且这个编译器就是用llvm作为后端，能够帮助我们编译一些C代码到LLVM中间码，方便我们有个正确的中间码参考。 ### CMAKE管理项目 CMake作为C++项目管理的利器，也是非常好用的一个工具，这样我们就不用自己很烦的写Makefile了，下面是一个CMake示例，同时还带有FLex和Bison的配置： ~~~ cmake_minimum_required(VERSION 2.8) project(RedApple) set(LLVM_TARGETS_TO_BUILD X86) set(LLVM_BUILD_RUNTIME OFF) set(LLVM_BUILD_TOOLS OFF) find_package(LLVM REQUIRED CONFIG) message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") find_package(BISON) find_package(FLEX) SET (CMAKE_CXX_COMPILER_ENV_VAR "clang++") SET (CMAKE_CXX_FLAGS "-std=c++11") SET (CMAKE_CXX_FLAGS_DEBUG "-g") SET (CMAKE_CXX_FLAGS_MINSIZEREL "-Os -DNDEBUG") SET (CMAKE_CXX_FLAGS_RELEASE "-O4 -DNDEBUG") SET (CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g") SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/bin) include_directories(${LLVM_INCLUDE_DIRS}) add_definitions(${LLVM_DEFINITIONS}) FLEX_TARGET(MyScanner ${CMAKE_CURRENT_SOURCE_DIR}/src/redapple_lex.l ${CMAKE_CURRENT_BINARY_DIR}/redapple_lex.cpp COMPILE_FLAGS -w) BISON_TARGET(MyParser ${CMAKE_CURRENT_SOURCE_DIR}/src/redapple_parser.y ${CMAKE_CURRENT_BINARY_DIR}/redapple_parser.cpp) ADD_FLEX_BISON_DEPENDENCY(MyScanner MyParser) include_directories(Debug Release build include src src/Model src/Utils) file(GLOB_RECURSE source_files ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/Model/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/Macro/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/Utils/*.cpp) add_executable(redapple ${source_files} ${BISON_MyParser_OUTPUTS} ${FLEX_MyScanner_OUTPUTS}) install(TARGETS redapple RUNTIME DESTINATION bin) # Find the libraries that correspond to the LLVM components # that we wish to use llvm_map_components_to_libnames(llvm_libs support core irreader executionengine interpreter mc mcjit bitwriter x86codegen target) # Link against LLVM libraries target_link_libraries(redapple ${llvm_libs}) ~~~ Ubuntu的默认安装，有时LLVM会出bug，cmake找不到许多配置文件，我仔细查看了它的CMake配置，发现有一行脚本路径写错了： /usr/share/llvm-3.6/cmake/ 是llvm的cmake配置路径其中的LLVMConfig.cmake第48行，它原来的路径是这样的： ~~~ set(LLVM_CMAKE_DIR "/usr/share/llvm-3.6/share/llvm/cmake") ~~~ 应该改成： ~~~ set(LLVM_CMAKE_DIR "/usr/share/llvm-3.6/cmake") ~~~ Ubuntu下的llvm文档和示例都在如下目录： /usr/share/doc/llvm-3.6-doc /usr/share/doc/llvm-3.6-examples 我们将example下的HowToUseJIT复制到工作目录中，测试编译一下，懒得找的可以粘我后面附录给的内容。然后再用简单修改后的CMake测试编译一下。项目结构是这样的： ~~~ HowToUseJIT -- src + --- HowToUseJIT.cpp + --- CMakeLists.txt + --- build ~~~ 在项目根目录执行如下指令： ~~~ cd build cmake .. make ~~~ 如果编译通过了，那么恭喜你，你已经会构建LLVM项目了 ### 附： CMakeLists.txt 和 HowToUseJIT.cpp CMakeLists.txt ~~~ cmake_minimum_required(VERSION 2.8) project(llvm_test) set(LLVM_TARGETS_TO_BUILD X86) set(LLVM_BUILD_RUNTIME OFF) set(LLVM_BUILD_TOOLS OFF) find_package(LLVM REQUIRED CONFIG) message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") SET (CMAKE_CXX_COMPILER_ENV_VAR "clang++") SET (CMAKE_CXX_FLAGS "-std=c++11") SET (CMAKE_CXX_FLAGS_DEBUG "-g") SET (CMAKE_CXX_FLAGS_MINSIZEREL "-Os -DNDEBUG") SET (CMAKE_CXX_FLAGS_RELEASE "-O4 -DNDEBUG") SET (CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g") SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/bin) include_directories(${LLVM_INCLUDE_DIRS}) add_definitions(${LLVM_DEFINITIONS}) file(GLOB_RECURSE source_files "${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp") add_executable(llvm_test ${source_files}) install(TARGETS llvm_test RUNTIME DESTINATION bin) # Find the libraries that correspond to the LLVM components # that we wish to use llvm_map_components_to_libnames(llvm_libs Core ExecutionEngine Interpreter MC Support nativecodegen) # Link against LLVM libraries target_link_libraries(llvm_test ${llvm_libs}) ~~~ HowToUseJIT.cpp ~~~ //===-- examples/HowToUseJIT/HowToUseJIT.cpp - An example use of the JIT --===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This small program provides an example of how to quickly build a small // module with two functions and execute it with the JIT. // // Goal: // The goal of this snippet is to create in the memory // the LLVM module consisting of two functions as follow: // // int add1(int x) { // return x+1; // } // // int foo() { // return add1(10); // } // // then compile the module via JIT, then execute the `foo' // function and return result to a driver, i.e. to a "host program". // // Some remarks and questions: // // - could we invoke some code using noname functions too? // e.g. evaluate "foo()+foo()" without fears to introduce // conflict of temporary function name with some real // existing function name? // //===----------------------------------------------------------------------===// #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/ExecutionEngine/Interpreter.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; int main() { InitializeNativeTarget(); LLVMContext Context; // Create some module to put our function into it. std::unique_ptr<Module> Owner = make_unique<Module>("test", Context); Module *M = Owner.get(); // Create the add1 function entry and insert this entry into module M. The // function will have a return type of "int" and take an argument of "int". // The '0' terminates the list of argument types. Function *Add1F = cast<Function>(M->getOrInsertFunction("add1", Type::getInt32Ty(Context), Type::getInt32Ty(Context), (Type *)0)); // Add a basic block to the function. As before, it automatically inserts // because of the last argument. BasicBlock *BB = BasicBlock::Create(Context, "EntryBlock", Add1F); // Create a basic block builder with default parameters. The builder will // automatically append instructions to the basic block `BB'. IRBuilder<> builder(BB); // Get pointers to the constant `1'. Value *One = builder.getInt32(1); // Get pointers to the integer argument of the add1 function... assert(Add1F->arg_begin() != Add1F->arg_end()); // Make sure there's an arg Argument *ArgX = Add1F->arg_begin(); // Get the arg ArgX->setName("AnArg"); // Give it a nice symbolic name for fun. // Create the add instruction, inserting it into the end of BB. Value *Add = builder.CreateAdd(One, ArgX); // Create the return instruction and add it to the basic block builder.CreateRet(Add); // Now, function add1 is ready. // Now we're going to create function `foo', which returns an int and takes no // arguments. Function *FooF = cast<Function>(M->getOrInsertFunction("foo", Type::getInt32Ty(Context), (Type *)0)); // Add a basic block to the FooF function. BB = BasicBlock::Create(Context, "EntryBlock", FooF); // Tell the basic block builder to attach itself to the new basic block builder.SetInsertPoint(BB); // Get pointer to the constant `10'. Value *Ten = builder.getInt32(10); // Pass Ten to the call to Add1F CallInst *Add1CallRes = builder.CreateCall(Add1F, Ten); Add1CallRes->setTailCall(true); // Create the return instruction and add it to the basic block. builder.CreateRet(Add1CallRes); // Now we create the JIT. ExecutionEngine* EE = EngineBuilder(std::move(Owner)).create(); outs() << "We just constructed this LLVM module:\n\n" << *M; outs() << "\n\nRunning foo: "; outs().flush(); // Call the `foo' function with no arguments: std::vector<GenericValue> noargs; GenericValue gv = EE->runFunction(FooF, noargs); // Import result of execution: outs() << "Result: " << gv.IntVal << "\n"; delete EE; llvm_shutdown(); return 0; } ~~~