From 333b605b2afd472b823aeda0adf0e8b1ea9843c0 Mon Sep 17 00:00:00 2001 From: fishsoupisgood Date: Mon, 27 May 2019 02:41:51 +0100 Subject: initial commit from asl-1.41r8.tar.gz --- doc_EN/Makefile | 36 + doc_EN/as.tex | 11521 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 11557 insertions(+) create mode 100644 doc_EN/Makefile create mode 100644 doc_EN/as.tex (limited to 'doc_EN') diff --git a/doc_EN/Makefile b/doc_EN/Makefile new file mode 100644 index 0000000..fdedff7 --- /dev/null +++ b/doc_EN/Makefile @@ -0,0 +1,36 @@ +INCFILES = ../doc_DE/taborg*.tex ../doc_DE/pscpu.tex ../doc_DE/pscomm.tex + +all: as.doc as.html as.ps + +as.ps: as.dvi + dvips as -o + +as.dvi: as.tex $(INCFILES) + latex as + latex as + makeindex as + latex as + +as.doc: as.tex $(INCFILES) $(TEX2DOC) + $(TEX2DOC) as.tex as.doc + $(TEX2DOC) as.tex as.doc + +as.html: as.tex $(INCFILES) $(TEX2HTML) + $(TEX2HTML) as.tex as.html + $(TEX2HTML) as.tex as.html + +clean: + $(RM) as.dvi + $(RM) as.ps + $(RM) as.i* + $(RM) *log + $(RM) as.aux + $(RM) as.toc + $(RM) *~ + $(RM) DEADJOE + $(RM) as.doc + $(RM) as.dtoc + $(RM) as.daux + $(RM) as.html + $(RM) as.htoc + $(RM) as.haux diff --git a/doc_EN/as.tex b/doc_EN/as.tex new file mode 100644 index 0000000..d7215de --- /dev/null +++ b/doc_EN/as.tex @@ -0,0 +1,11521 @@ +%% Hello altogether, +%% +%% this is a release of the english AS manual. I haven't +%% done the entire translation myself, large parts of it are the work of some +%% other people around the net who deserve my deep appreciation for this job. +%% My parts of the translation are the results of a brute-force attempt, +%% so there are surely tons of spelling errors and passages that will +%% make people with english as their mother tongue either laugh or cry... + +%% Alfred Arnold + +%% translation by: Oliver Sellke (OSIP, D-65199 Wiesbaden) +%% (proof-read in parts by Stefan Hilse, Wiesbaden) +%% Alfred Arnold +%% Stephan Kanthak +%% Vittorio De Tomasi +%% +%% thanks to the authors of: +%% FB-translator +%% GNU-ispell +%% +%% ------------------------------------------------------------------------------ + +%%TITLE User's Manual for Macro Assembler AS +\documentstyle[12pt,twoside,makeidx]{report} +\pagestyle{headings} +\sloppy +%%\textwidth 15cm +%%\evensidemargin 0.5cm +%%\oddsidemargin 0.5cm +\topsep 1mm +\parskip 0.3cm plus0.25cm minus0.25cm +\parindent 0cm + +\newcommand{\ii}[1]{{\it #1}} +\newcommand{\bb}[1]{{\bf #1}} +\newcommand{\tty}[1]{{\tt #1}} +\newcommand{\tin}[1]{{\scriptsize #1}} +\newcommand{\ttindex}[1]{\index{#1@{\tt #1}}} + +\font\mengft=cmss9 scaled \magstep1 +\def \rz{\hbox{\mengft{I \hskip -1.7mm R}}} + +\makeindex + +%%=========================================================================== + +\begin{document} + +\thispagestyle{empty} + +\ +\vspace{7cm}\par + +\begin{raggedright} +{\large Alfred Arnold, Stefan Hilse, Stephan Kanthak, Oliver + Sellke, Vittorio De Tomasi} +\vspace{1cm}\par +{\huge Macro Assembler AS V1.41r8}\\ +\rule{9.5cm}{0.3mm}\\ +\vspace{2mm}\par +{\huge User's Manual} + +\vspace{1cm}\par + +{\large Edition November 1999} +\end{raggedright} + +\clearpage +\thispagestyle{empty} + +\ \vspace{5cm} + +{\em IBM, PPC403Gx, OS/2, and PowerPC} are registered trademarks of IBM +Corporation. + +{\em Intel, MCS-48, MCS-51, MCS-251, MCS-96, MCS-196 und MCS-296} are +registered trademarks of Intel Corp. . + +{\em Motorola and ColdFire} are registered trademarks of Motorola Inc. . + +{\em UNIX} is a registered trademark of X/Open Company. + +{\em Linux} is a registered trademark of Linus Thorvalds. + +{\em Microsoft, Windows, and MS-DOS} are registered trademarks of +Microsoft Corporation. + +All other trademarks not explicitly mentioned in this section and used in +this manual are properties of their respective owners. + +\vspace{7cm} + +This document has been processed with the LaTeX typesetting system using +Digital Unix, Linux, and OS/2 operating systems running on AMD K6 and +DEC Alpha processors. + +\clearpage + +%%=========================================================================== + +{\parskip 0cm plus0.1cm \tableofcontents} + +%%=========================================================================== + +\cleardoublepage +\chapter{Introduction} + +This instruction is meant for those people who are already very familiar +with Assembler and who like to know how to work with AS. It is rather a +reference than a user's manual and so it neither tries to explain the +''language assembler'' nor the processors. I have listed further +literature in the bibliography which was substantial in the implementation +of the different code generators. There is no book I know where you can +learn Assembler from the start, so I generally learned this by ''trial and +error''. + +%%--------------------------------------------------------------------------- + +\section{License Agreement} +\label{SectLicense} + +Before we can go ''in medias res'', first of all the inevitable prologue: + +I publish AS, in the present version, as ''Public Domain''. +This means, the program and overlay files and also the +utility and tool programs appended may be copied and use for free (of +charge). There are no plans to convert AS into a commercial or +shareware program. This permission however is valid only under the +following premises: +\begin{enumerate} +\item{The start message of the programs - especially the copyright + message - must not be removed or overwritten.} +\item{The compensation charged for copying and shipping must not + exceed D(E)M 20,-- (around US\$ 14.00).} +\end{enumerate} + +On request the source code of this program can also be made available. +Programs or derivates structured hereon must be passed-on under the +same conditions as this program. + +I explicitly encourage you to spread this program by disc or +mailbox, BBS, resp. Internet! + +May be, you have got this program as enclosure to a commercial +program. The license agreement for the commercial program in no case +applies to AS. + +If you took so much pleasure in this assembler that you like to +send me some money, I would ask you kindly to give the amount +to Greenpeace. + +I have been trying to make the programs as bug free as possible. +But since there is principally no bug free software (the only people +making no mistakes are lying in the cemetery!), I do not take any +warranty for the function of the assembler in a particular environment +(hard or software) or liability for damages. Naturally I will always +be thankful for bug-reports or improvements and will work on the fixing. + +To accelerate the error diagnose and correction, please add the +following details to the bug report: +\begin{itemize} +\item{hardware: \begin{itemize} + \item{processor type (with/without coprocessor)} + \item{amount of memory installed} + \item{video card} + \item{hard-disk type(s) and their interface(s)} + \end{itemize}} +\item{software: \begin{itemize} + \item{operating system (MS-DOS, Novell-DOS, DR-DOS, OS/2, + Windows) and version} + \item{resident (TSR) programs installed} + \item{version of AS including dates of the \tty{EXE}-files} + \end{itemize}} +\item{if possible, the source file, in which the bug occurs} +\end{itemize} +You can contact me as follows: +\begin{itemize} +\item{by Surface Mail: \begin{description} + \item{Alfred Arnold} + \item{Hirschgraben 29} + \item{D-52062 Aachen} + \item{Germany} + \end{description}} +\item{by E-Mail: \tty{alfred@ccac.rwth-aachen.de}} +\end{itemize} +If someone likes to meet me personally to ask questions and lives +near Aachen (= Aix-la-Chapelle), you will be able to meet me there. +You can do this most probably on thursdays from 7pm to 9pm at the +computerclub inside the RWTH Aachen (Eilfschornsteinstrasse 16, +cellar of philosophers' building, backdoor entry). + +Please don't call me by phone. First, complex relations are +extremely hard to discuss at phone. Secondly, the telephone +companies are already rich enough... + +The latest german version of AS (DOS,DPMI,OS/2) is available from +the following FTP-Server: +\begin{verbatim} + ftp.uni-stuttgart.de, directory + pub/systems/msdos/programming/as +\end{verbatim} +The sources of the C version can be fetched from the following server: +\begin{verbatim} + sunsite.unc.edu, directory + pub/Linux/devel/lang/assemblers/asl-.tar.gz +\end{verbatim} +..and of course thereby from every Sunsite mirror of the world! + +Whoever has no access to an FTP-Server can ask me to send the assembler +by mail. Only requests containing floppies (2 pieces 1.44 Mbytes, for +720Kbytes/1.2Mbytes format 4/3 pieces) and a self-addressed, (correctly) +stamped envelope will be answered. Don't send any money! + +Now, after this inevitable introduction we can turn to the actual +documentation: + +%%--------------------------------------------------------------------------- + +\section{General Capabilities of the Assembler} + +In contrast to ordinary assemblers, AS offers the possibility to +generate code for totally different processors. At the moment, the +following processor families have been implemented: +\begin{itemize} +\item{Motorola 68000..68040,683xx incl. coprocessor and MMU} +\item{Motorola ColdFire} +\item{Motorola DSP5600x,DSP56300} +\item{Motorola M-Core} +\item{Motorola/IBM MPC601/MPC505/PPC403} +\item{Motorola 6800, 6805, 68HC08, 6809, 68(HC)11 68HC12, 68HC16, and Hitachi + 6301} +\item{Hitachi 6309} +\item{Hitachi H8/300(H)} +\item{Hitachi H8/500} +\item{Hitachi SH7000/7600/7700} +\item{Rockwell 6502 and 65(S)C02} +\item{CMD 65816} +\item{Mitsubishi MELPS-740} +\item{Mitsubishi MELPS-7700} +\item{Mitsubishi MELPS-4500} +\item{Mitsubishi M16} +\item{Mitsubishi M16C} +\item{Intel 4004} +\item{Intel MCS-48/41} +\item{Intel MCS-51/251} +\item{Intel MCS-96/196(Nx)/296} +\item{Intel 8080/8085} +\item{Intel i960} +\item{Signetics 8X30x} +\item{Philips XA} +\item{Atmel AVR} +\item{AMD 29K} +\item{Siemens 80C166/167} +\item{Zilog Z80, Z180, Z380} +\item{Zilog Z8} +\item{Toshiba TLCS-900(L)} +\item{Toshiba TLCS-90} +\item{Toshiba TLCS-870} +\item{Toshiba TLCS-47} +\item{Toshiba TLCS-9000} +\item{Microchip PIC16C54..16C57} +\item{Microchip PIC16C84/PIC16C64} +\item{Microchip PIC17C42} +\item{SGS-Thomson ST6} +\item{SGS-Thomson ST7} +\item{SGS-Thomson ST9} +\item{SGS-Thomson 6804} +\item{Texas Instruments TMS32010/32015} +\item{Texas Instruments TMS3202x} +\item{Texas Instruments TMS320C3x} +\item{Texas Instruments TMS320C20x/TMS320C5x} +\item{Texas Instruments TMS320C6x} +\item{Texas Instruments TMS9900} +\item{Texas Instruments TMS7000} +\item{Texas Instruments TMS370xxx} +\item{Texas Instruments MSP430} +\item{National Semiconductor SC/MP} +\item{National Semiconductor COP8} +\item{National Semiconductor SC144xx} +\item{Fairchild ACE} +\item{NEC $\mu$PD 78(C)1x} +\item{NEC $\mu$PD 75xxx (alias 75K0)} +\item{NEC $\mu$PD 78xxx (alias 78K0)} +\item{NEC $\mu$PD7720/7725} +\item{NEC $\mu$PD77230} +\item{Symbios Logic SYM53C8xx (yes, they are programmable!)} +\item{Fujitsu F$^2$MC8L} +\end{itemize} +under work / planned / in consideration : +\begin{itemize} +\item{Intel 4004/8008} +\item{Analog Devices ADSP21xx} +\item{SGS-Thomson ST20} +\item{Texas Instruments TMS320C4x} +\item{Texas Instruments TMS320C8x} +\item{Toshiba TC9331} +\end{itemize} +I'm currently searching for documentation about the following +families: +\begin{itemize} +\item{NEC 78K4} +\item{the complete set of OKI controllers} +\end{itemize} +unloved, but now, however, present : +\begin{itemize} +\item{Intel 80x86, 80186, Nec V30\&V35 incl. coprocessor 8087} +\end{itemize} +The switch to a different code generator is allowed even within one +file, and as often as one wants! + +The reason for this flexibility is that AS has a history, which may also +be recognized by looking at the version number. AS was created as an +extension of a macro assembler for the 68000 family. On special request, I +extended the original assembler so that it was able to translate 8051 +mnemonics. On this way (decline ?!) from the 68000 to 8051, some other +processors were created as by-products. All others were added over time +due to user requests. So At least for the processor-independent core of +AS, one may assume that it is well-tested and free of obvious bugs. +However, I often do not have the chance to test a new code generator in +practice (due to lack of appropriate hardware), so surprises are not +impossible when working with new features. You see, the things stated in +section \ref{SectLicense} have a reason... + +This flexibility implies a somewhat exotic code format, therefore I +added some tools to work with it. Their description can be found in +chapter \ref{ChapTools}. + +AS is a macro assembler, which means that the programmer has the +possibility to define new ''commands'' by means of macros. +Additionally it masters conditional assembling. Labels inside macros +are automatically processed as being local. + +For the assembler, symbols may have either integer, string or floating +point values. These will be stored - like interim values in formulas - +with a width of 32 bits for integer values, 80 or 64 bits for floating +point values, and 255 characters for strings. For a couple of micro +controllers, there is the possibility to classify symbols by segmentation. +So the assembler has a (limited) possibility to recognize accesses to +wrong address spaces. + +The assembler does not know explicit limits in the nesting depth of +include files or macros; a limit is only given by the program stack +restricting the recursion depth. Nor is there a limit for the +symbol length, which is only restricted by the maximum line length. + +From version 1.38 on, AS is a multipass-assembler. This pompous term +means no more than the fact that the number of passes through the +source code need not be exactly two. If the source code does not +contain any forward references, AS needs only one pass. In case AS +recognizes in the second pass that it must use a shorter or longer +instruction coding, it needs a third (fourth, fifth...) pass to +process all symbol references correctly. There is nothing more behind +the term ''multipass'', so it will not be used further more in this +documentation. + +After so much praise a bitter pill: AS cannot generate linkable code. +An extension with a linker needs considerable effort and is not planned +at the moment. + +As regards ''release of sources'': the sources of AS are not +presented in a form which allows easy understanding (== no +comments). So I will emit sources only in case somebody really wants +to work on it (e.g. to port AS into another computer system) and the +derivates become again Public Domain. Particularly I want to prevent +that someone changes 5 lines (most popular the copyright entry) and +sell the result commercially as ''his own'' program. + +%%--------------------------------------------------------------------------- + +\section{Supported Platforms} + +Though AS started as a pure DOS \marginpar{{\em DOS}} program, there are a +couple of versions available that are able to exploit a bit more than the +Real Mode of an Intel CPU. Their usage is kept as compatible to the DOS +version as possible, but there are of course differences concerning +installation and embedding into the operating system in question. +Sections in this manual that are only valid for a specific version of AS +are marked with a corresponding sidemark (at this paragraph for the DOS +version) aheaded to the paragraph. In detail, the following further +versions exist (distributed as separate packages): + +In case you run \marginpar{{\em DPMI}}into memory problems when assembling +large and complex programs under DOS, there is a DOS version that runs in +protected mode via a DOS extender and can therefore make use of the whole +extended memory of an AT. The assembly becomes significantly slower by +the extender, but at least it works... + +There is a native OS/2 \marginpar{{\em OS/2}} version of AS for friends of +IBM's OS/2 operating system. This is currently only a 16-bit version, but +at least this way saves the roundtrips via DOS boxes and one does not have +any problems any more with longer file names. + +You can leave \marginpar{{\em UNIX}} the area of PCs-only with the C +version of AS that was designed to be compilable on a large number of UNIX +systems (this includes OS/2 with the emx compiler) without too much of +tweaking. In contrast to the previously mentioned versions, the C version +is delivered in source code, i.e. one has to create the binaries by +oneself using a C compiler. This is by far the simpler way (for me) than +providing a dozen of precompiled binaries for machines I sometimes only +have limited access to... + +People who have read \marginpar{{\em ???}} this enumeration up to this +point will notice that world's best-selling operating system coming from +Redmont is missing in this enumeration. People who know me personally +will know that I do not regard Windows to be a pat solution (regardless if +its 3.X, 95, or NT). Frankly said, I am a 'windows hater'. A large +number of people will now regard this to be somewhere between obsolete and +ridiculous, and they will tell me that I withhold AS from a large part of +potential users, but they will have to live with it: I primarily continue +to improve AS because I have fun doing it; AS is a non-commercial project +and I therefore take the freedom not to look at potential market shares. I +select platforms for me where I have fun programming, and I definitely do +not have any fun when programming for Windows! By the way, there was a +time when I had to write Windows programs so I do not simply jabber +without having an idea what I am talking about. If someone wants to port +AS into this direction, I will not stand in his way, but (s)he should not +expect anything more from me than providing sources (which is why (s)he +will have to deal with questions like 'why does AS not work any more after +I changed the JUNK-CAD 18.53 registry entry from upper to lower case?'). + +%%=========================================================================== + +\cleardoublepage +\chapter{Assembler Usage} + +\begin{quote}\begin{raggedright}{\it +Scotty: Captain, we din\verb!'! can reference it! \\ +Kirk: Analysis, Mr. Spock? \\ +Spock: Captain, it doesn\verb!'!t appear in the symbol table. \\ +Kirk: Then it\verb!'!s of external origin? \\ +Spock: Affirmative. \\ +Kirk: Mr. Sulu, go to pass two. \\ +Sulu: Aye aye, sir, going to pass two. \\ +}\end{raggedright}\end{quote} + +%%--------------------------------------------------------------------------- + +\section{Hardware Requirements} + +The hardware requirements of AS vary substantially from version to +version: + +The DOS version \marginpar{{\em DOS}} will principally run on any +IBM-compatible PC, ranging from a PC/XT with 4-dot-little megahertz up to +a Pentium. However, similar to other programs, the fun using AS increases +the better your hardware is. An XT user without a hard drive will +probably have significant trouble placing the overlay file on a floppy +because it is larger than 500 Kbytes...the PC should therefore have at +least a hard drive, allowing acceptable loading times. AS is not very +advanced in its main memory needs: the program itself allocates less than +300 Kbytes main memory, AS should therefore work on machines with at least +512 Kbytes of memory. + +The version of AS \marginpar{{\em DPMI}} compiled for the DOS Protected +Mode Interface (DPMI) requires at least 1 Mbyte of free extended memory. +A total memory capacity of at least 2 Mbytes is therefore the absolute +minimum given one does not have other tools in the XMS (like disk caches, +RAM disks, or a hi-loaded DOS); the needs will rise then appropriately. +If one uses the DPMI version in a DOS box of OS/2, one has to assure that +DPMI has been enabled via the box's DOS settings (set to \tty{on} or +\tty{auto}) and that a sufficient amount of XMS memory has been assigned +to the box. The virtual memory management of OS/2 will free you +from thinking about the amount of free real memory. + +The hardware requirements of the OS/2 \marginpar{{\em OS/2}} version +mainly result from the needs of the underlying operating system, i.e. at +minimum an 80386SX processor, 8 Mbytes of RAM (resp. 4 Mbytes without the +graphical user interface) and 100..150 Mbytes of hard disk space. AS2 is +only a 16-bit application and therefore it should also work on older OS/2 +versions (thereby reducing the processor needs to at least an 80286 +processor); I had however no chance to test this. + +The C version of AS \marginpar{{\em UNIX}} is delivered as source code and +therefore requires a UNIX or OS/2 system equipped with a C compiler. The +compiler has to fulfill the ANSI standard (GNU-C for example is +ANSI-compliant). You can look up in the \tty{README} file whether your +UNIX system has already been tested so that the necessary definitions have +been made. You should reserve about 15 Mbytes of free hard disk space for +compilation; this value (and the amount needed after compilation to store +the compiled programs) strongly differs from system to system, so you +should take this value only as a rough approximation. + +%%--------------------------------------------------------------------------- + +\section{Delivery} + +Principally, you can obtain AS in one of two forms: as a {\em binary} or a +{\em source} distribution. In case of a binary distribution, one gets AS, +the accomanying tools and auxiliary files readily compiled, so you can +immediately start to use it after unpacking the archive to the desired +destination on your hard drive. +Binary distibutions are made for widespread platforms, where either the +majority of users does not have a compiler or the compilation is tricky +(currently, this includes DOS and OS/2). A source distribution in +contrast contains the complete set of C sources to generate AS; it is +ultimately a snapshot of the source tree I use for development on AS. The +generation of AS from the sources and their structure is described in +detail in appendix \ref{ChapSource}, which is why at this place, only the +contents and installation of a binary distribution will be described: + +The contents of the archive is separated into several subdirectories, +therefore you get a directory subtree immediately after unpacking without +having to sort out things manually. The individual directories contain +the following groups of files: +\begin{itemize} +\item{{\tt BIN}: executable programs, text resources;} +\item{{\tt INCLUDE}: include files for assembler programs, e.g. register + definitions or standard macros;} +\item{{\tt MAN}: quick references for the individual programs in Unix + 'man' format.} +\end{itemize} +A list of the files found in every binary distribution is given in tables +\ref{TabCommonPackageList1} to \ref{TabCommonPackageList3}. In case a +file listed in one of these (or the following) tables is missing, someone +took a nap during copying (probably me)... + +\begin{table*}[htp] +\begin{center}\begin{tabular}{|l|l|} +\hline +File & function \\ +\hline +\hline +{\bf Directory BIN} & \\ +\hline +AS.EXE & executable of assembler \\ +PLIST.EXE & lists contents of code files \\ +PBIND.EXE & merges code files \\ +P2HEX.EXE & converts code files to hex files \\ +P2BIN.EXE & converts code files to binary files \\ +AS.MSG & text resources for AS \\ +PLIST.MSG & text resources for PLIST \\ +PBIND.MSG & text resources for PBIND \\ +P2HEX.MSG & text resources for P2HEX \\ +P2BIN.MSG & text resources for P2BIN \\ +TOOLS.MSG & common text resources for all tools \\ +CMDARG.MSG & common text resources for all programs \\ +DECODECMD.MSG & \\ +IOERRS.MSG & \\ +\hline +\hline +{\bf Directory DOC} & \\ +\hline +AS\_DE.DOC & german documentation, ASCII format \\ +AS\_DE.HTML & german documentation, HTML format \\ +AS\_DE.TEX & german documentation, LaTeX format \\ +AS\_EN.DOC & english documentation, ASCII format \\ +AS\_EN.HTML & english documentation, HTML format \\ +AS\_EN.TEX & english documentation, LaTeX format \\ +\hline +\hline +{\bf Directory INCLUDE} & \\ +\hline +BITFUNCS.INC & functions for bit manipulation \\ +CTYPE.INC & functions for classification of \\ + & characters \\ +80C50X.INC & register addresses SAB C50x \\ +80C552.INC & register addresses 80C552 \\ +H8\_3048.INC & register addresses H8/3048 \\ +REG166.INC & addresses and instruction macros 80C166/167 \\ +REG251.INC & addresses and bits 80C251 \\ +REG29K.INC & peripheral addresses AMD 2924x \\ +\hline +\end{tabular}\end{center} +\caption{Standard Contents of a Binary Distribution - Part 1 + \label{TabCommonPackageList1}} +\end{table*} +\begin{table*}[htp] +\begin{center}\begin{tabular}{|l|l|} +\hline +File & Function \\ +\hline +\hline +{\bf Directory INCLUDE} & \\ +\hline +REG53X.INC & register addresses H8/53x \\ +REG683XX.INC & register addresses 68332/68340/68360 \\ +REG7000.INC & register addresses TMS70Cxx \\ +REG78K0.INC & register addresses 78K0 \\ +REG96.INC & register addresses MCS-96 \\ +REGACE.INC & register addresses ACE \\ +REGAVR.INC & register and bit addresses AVR family \\ +REGCOP8.INC & register addresses COP8 \\ +REGHC12.INC & register addresses 68HC12 \\ +REGM16C.INC & register addresses Mitsubishi M16C \\ +REGMSP.INC & register addresses TI MSP430 \\ +REGST9.INC & register and Makrodefinitionen ST9 \\ +REGZ380.INC & register addresses Z380 \\ +STDDEF04.INC & register addresses 6804 \\ +STDDEF16.INC & instruction macros and register addresses \\ + & PIC16C5x \\ +STDDEF17.INC & register addresses PIC17C4x \\ +STDDEF18.INC & register addresses PIC16C8x \\ +STDDEF2X.INC & register addresses TMS3202x \\ +STDDEF37.INC & register and bit addresses TMS370xxx \\ +STDDEF3X.INC & peripheral addresses TMS320C3x \\ +STDDEF47.INC & instruction macros TLCS-47 \\ +STDDEF51.INC & definition of SFRs and bits for \\ + & 8051/8052/80515 \\ +STDDEF56K.INC & register addresses DSP56000 \\ +STDDEF5X.INC & peripheral addresses TMS320C5x \\ +STDDEF60.INC & instruction macros and register addresses \\ + & PowerPC \\ +STDDEF62.INC & register addresses and Makros ST6 \\ +STDDEF75.INC & register addresses 75K0 \\ +\hline +\end{tabular}\end{center} +\caption{Standard Contents of a Binary Distribution - Part 2 + \label{TabCommonPackageList2}} +\end{table*} +\begin{table*}[htp] +\begin{center}\begin{tabular}{|l|l|} +\hline +File & Function \\ +\hline +\hline +{\bf Directory INCLUDE} & \\ +\hline +STDDEF87.INC & register and memory addresses TLCS-870 \\ +STDDEF90.INC & register and memory addresses TLCS-90 \\ +STDDEF96.INC & register and memory addresses TLCS-900 \\ +STDDEFXA.INC & SFR and bit addresses Philips XA \\ +STDDEFZ8.INC & register addresses Z8 family \\ +\hline +\hline +{\bf Directory LIB} & \\ +\hline +\hline +{\bf Directory MAN} & \\ +\hline +ASL.1 & quick reference for AS \\ +PLIST.1 & quick reference for PLIST \\ +PBIND.1 & quick reference for PBIND \\ +P2HEX.1 & quick reference for P2HEX \\ +P2BIN.1 & quick reference for P2BIN \\ +\hline +\end{tabular}\end{center} +\caption{Standard Contents of a Binary Distribution - Part 3 + \label{TabCommonPackageList3}} +\end{table*} + + +Depending on the platform, a binary distribution however may contain more +files to allow operation, like files necessary for DOS extenders. In case +of the DOS DPMI version \marginpar{{\em DPMI}}, the extensions listed in +table \ref{TabDPMIPackageList} result. Just to mention it: it is +perfectly O.K. to replace the tools with their counterparts from a DOS +binary distribution; on the on hand, they execute significantly faster +without the extender's overhead, and on the other hand, they do not need +the extended memory provided by the extender. + +\begin{table*}[htp] +\begin{center}\begin{tabular}{|l|l|} +\hline +File & Function \\ +\hline +\hline +{\bf Directory BIN} & \\ +\hline +DPMI16BI.OVL & DPMI server for the assembler \\ +RTM.EXE & runtime module of the extender \\ +\hline +\end{tabular}\end{center} +\caption{Additional Files in a DPMI Binary Distribution + \label{TabDPMIPackageList}} +\end{table*} + +An OS/2 binary distribution \marginpar{{\em OS/2}} contains in addition to +the base files a set of DLLs belonging to the runtime environment of the +emx compiler used to build AS (table \ref{TabOS2PackageList}). In case +you already have these DLLs (or newer versions of them), you may delete +these and use your ones insted. + +\begin{table*}[htp] +\begin{center}\begin{tabular}{|l|l|} +\hline +File & function \\ +\hline +\hline +{\bf Directory BIN} & \\ +\hline +EMX.DLL & runtime libraries for AS and \\ +EMXIO.DLL & its tools \\ +EMXLIBC.DLL & \\ +EMXWRAP.DLL & \\ +\hline +\end{tabular}\end{center} +\caption{Additional Files in an OS/2 binary distribution + \label{TabOS2PackageList}} +\end{table*} + +%%--------------------------------------------------------------------------- + +\section{Installation} + +There is no need for a \marginpar{{\em DOS}} special installation prior to +usage of AS. It is sufficient to unpack the archive in a fitting place +and to add a few minor settings. For example, this is an installation a +user used to UNIX-like operating systems might choose: + +Create a directory \verb!c:\as! an (I will assume in the following that +you are going to install AS on drive C), change to this directory and +unpack the archiv, keeping the path names stored in the archive (when +using PKUNZIP, the command line option \verb!-d! is necessary for that). +You now should have the following directory tree: +\begin{verbatim} +c:\as +c:\as\bin +c:\as\include +c:\as\lib +c:\as\man +c:\as\doc +c:\as\demos +\end{verbatim} +Now, append the directory \verb!c:\as\bin! to the \tty{PATH} statement in +your \tty{AUTOEXEC.BAT}, which allows the system to find AS and its tools. +With your favourite text editor, create a file named \tty{AS.RC} in the +\tty{lib} directory with the following contents: +\begin{verbatim} +-i c:\as\include +\end{verbatim} +This so-called {\em key file} tells AS where to search for its include +files. The following statement must be added to your \tty{AUTOEXEC.BAT} +to tell AS to read this file: +\begin{verbatim} +set ASCMD=@c:\as\lib\as.rc +\end{verbatim} +There are many more things you can preset via the key file; they are +listed in the following section. + +The installation of the DPMI version \marginpar{{\em DPMI}} should +principally take the same course as for the pure DOS version; as soon as +the PATH contains the {\tt bin} directory, the DOS extender's files will +be found automatically and you should not notice anything of this +mechanism (except for the longer startup time...). When working on an +80286-based computer, it is theoretically possible tha you get confronted +with the following message upon the first start: +\begin{verbatim} + machine not in database (run DPMIINST) +\end{verbatim} +Since the DPMIINST tool ins not any more included in newer versions of +Borland's DOS extender, I suppose that this is not an item any more...in +case you run into this, contact me! + +The installation of the OS/2 version \marginpar{{\em OS/2}} can generally +be done just like for the DOS version, with the addition that the DLLs +have to be made visible for the operating system. In case you do not want +to extend the {\tt LIBPATH} entry in your {\tt CONFIG.SYS}, it is of +course also valid to move the DLLs into a directory already listed in {\tt +LIBPATH}. + +As already mentioned, the installation instructions in this section limit +themselves to binary distributions. Since an installation under Unix +\marginpar{{\em UNIX}} is currently alway a source-based installation, the +only hint I can give here is a reference to appendix \ref{ChapSource}. + +%%--------------------------------------------------------------------------- + +\section{Start-Up Command, Parameters} +\label{SectCallConvention} + +AS is a command line driven program, i.e. all parameters and file +options are to be given in the command line. + +A couple of message files belongs to AS (recognizable by their suffix {\tt +MSG}) AS accesses to dynamically load the messages appropriate for the +national language. AS searches the following directories for these files: +\begin{itemize} +\item{the current directory;} +\item{the EXE-file's directory;} +\item{the directory named in the {\tt AS\_MSGPATH} environment variable, + or alternitavely the directories listed in the {\tt PATH} environment + variable;} +\item{the directory compiled into AS via the {\tt LIBDIR} macro.} +\end{itemize} +These files are {\em indispensable} for a proper operation of AS, i.e. AS +will terminate immediately if these files are not found. + +The language selection (currently only German and English) is based on the +{\tt COUNTRY} setting under DOS and OS/2 respectively on the {\tt LANG} +environment variable under Unix. + +In order to fulfill \marginpar{{\em DOS}} AS's memory requirements under +DOS, the various code generator modules of the DOS version were moved into +an overlay which is part of the EXE file. A separate OVR file like in +earlier versions of AS therefore dose not exist any more, AS will however +still attempt to reduce the overlaying delays by using eventually +available EMS or XMS memory. In case this results in +trouble, you may suppress usage of EMS or XMS by setting the environment +variable \tty{USEXMS} or \tty{USEEMS} to \tty{n}. E.g., it is possible to +suppress the using of XMS by the command: +\begin{verbatim} + SET USEXMS=n +\end{verbatim} +Since AS performs all in- and output via the operating system (and +therefore it should run also on not 100\% compatible DOS-PC's) and +needs some basic display control, it emits ANSI control sequences +during the assembly. +In case you \marginpar{{\em DOS/}} should see strange characters in the +messages displayed by AS, your \tty{CONFIG.SYS} is obviously lacking a +line like this: +\begin{verbatim} + device=ansi.sys +\end{verbatim} +but the further \marginpar{{\em DPMI}} functions of AS will not be +influenced hereby. Alternatively you are able to suppress the output of +ANSI sequences completely by setting the environment variable +\tty{USEANSI} to \tty{n}. + +The DOS extender of the DPMI version \marginpar{{\em DPMI}} can be +influenced in its memory allocation strategies by a couple of environment +variables; if you need to know their settings, you may look up them in the +file \tty{DPMIUSER.DOC}. ASX is additionally able to extend the available +memory by a swap file. To do this, set up an environment variable +\tty{ASXSWAP} in the following way: +\begin{verbatim} + SET ASXSWAP=[,file name] +\end{verbatim} +The size specification has to be done in megabytes and \bb{has} to be done. +The file name in contrast is optional; if it is missing, the file is +named \tty{ASX.TMP} and placed in the current directory. In any case, the +swap file is deleted after program end. + +The command line parameters can roughly be divided into three categories: +switches, key file references (see below) and file specifications. +Parameters of these two categories may be arbitrarily mixed in the command +line. The assembler evaluates at first all parameters and then assembles +the specified files. From this follow two things: +\begin{itemize} +\item{the specified switches affect all specified source files. If + several source files shall be assembled with different switches, + this has to be done in separate runs.} +\item{it is possible to assemble more than one file in one shot and to + bring it to the top, it is allowed that the file specs contain + wildcards.} +\end{itemize} +Parameter switches are recognized by AS by starting with +a slash (/) or hyphen (-). There are switches that are only one +character long and additionally switches composed of a whole word. +Whenever AS cannot interpret a switch as a whole word, it tries to +interprete every letter as an individual switch. For example, if you +write +\begin{verbatim} + -queit +\end{verbatim} +instead of +\begin{verbatim} + -quiet +\end{verbatim} +AS will take the letters \tty{q, u, e, i}, and \tty{t} as individual +switches. Multiple-letter switches additionally have the difference to +single-letter switches that AS will accept an arbitrary mixture of upper +and lower casing, whereas single-letter switches may have a different +meaning depending on whether upper or lower case is used. + +At the moment, the following switches are defined: +\ttindex{SHARED} +\begin{itemize} +\item{\tty{l}: sends assembler listing to console terminal (mostly screen). + In case several passes have to be done, the listing of all + passes will be send to the console (in opposite to the next + option).} +\item{\tty{L}: writes assembler listing into a file. The list file will get + the same name as the source file, only the extension is + replaced by \tty{LST}.} +\item{\tty{o}: Sets the new name of the code file generated by AS. If this + option is used multiple times, the names will be assigned, one + after the other, to the source files which have to be + assembled. A negation (see below) of this option in + connection with a name erases this name from the list. A + negation without a name erases the whole list.} +\item{\tty{SHAREOUT}:ditto for a SHARE file eventually to be created.} +\item{\tty{c}: SHARED-variables will be written in a format which permits + an easy integration into a C-source file. The extension of + the file is \tty{H}.} +\item{\tty{p}: SHARED-variables will be written in a format which permits + easy integration into the CONST-block of a Pascal program. + The extension of the file is \tty{INC}.} +\item{\tty{a}: SHARED-variables will be written in a format which permits + easy integration into an assembler source file. The + extension of the file is \tty{INC}.} +\end{itemize} +Concerning effect and function of the SHARED-symbols please see +chapters \ref{ChapShareMain} resp. \ref{ChapShareOrder}. +\begin{itemize} +\item{\tty{g}: This switch instructs AS to create an additional file that + contains debug information for the program. Allowed formats are the + AS-specific \tty{MAP} format, a \tty{NoICE}-compatible command file, + and the \tty{Atmel} format used by the AVR tools. The information + stored in the MAP format is comprised of a symbol table and a table + describing the assignment of source lines to machine addresses. A + more detailed description of the MAP format can be found in section + \ref{SectDebugFormat} The file's extension is \tty{MAP}, \tty{NOI}, + resp. \tty{OBJ}, depending on the chosen format. If no explicit + format specification is done, the MAP format is chosen.} +\item{\tty{w}: suppress issue of warnings;} +\item{\tty{E [file]}: error messages and warnings produced by AS will be + redirected to a file. Instead of a file, the 5 standard + handles (STDIN..STDPRN) can also be specified as + \tty{!0} to \tty{!4} . Default is \tty{!2}, meaning STDERR. If the + file option is left out, the name of the error file + is the same as of the source file, but with the + extension \tty{LOG}.} +\item{\tty{q}: This switch suppresses all messages of AS, the exceptions are + error messages and outputs which are are forced from the + source file. The time needed for assembly is slightly reduced + hereby and if you call AS from a shell there is no redirection + required. The disadvantage is that you may ''stay in the dark'' + for several minutes ... It is valid to write \tty{quiet} instead + of \tty{q}.} +\item{\tty{h}: write hexadecimal numbers in lowercase instead of capital + letters. This option is primarily a question of personal + taste.} +\item{\tty{i $<$path list$>$}: issues a list of directories where the + assembler shall automatically search for include + files, in case it didn't find a file in the + current directory. The different directories + have to be separated by semicolons.} +\item{\tty{u}: calculate a list of areas which are occupied in the segments. + This option is effective only in case a listing is + produced. This option requires considerable additional + memory and computing performance. In normal operation it + should be switched off.} +\item{\tty{C}: generates a list of cross references. It lists which (global) + symbols are used in files and lines. This list will also be + generated only in case a listing is produced. This option + occupies, too, additional memory capacity during assembly.} +\item{\tty{s}: issues a list of all sections (see chapter + \ref{ChapLocSyms}). The nesting is indicated by indentations + (Pascal like).} +\item{\tty{t}: by means of this switch it is possible to separate single + components of the standard issued assembler-listing. The assignment + of bits to parts can be found in the next section, where the exact + format of the assembly listing is explained.} +\item{\tty{D}: defines symbols. The symbols which are specified behind this + option and separated by commas are written to the + global symbol table before starting the assembly. As default + these symbols are written as integer numbers with the + value TRUE, by means of an appended equal sign, however, you + can select other values. The expression following the equals + sign may include operators or internal functions, but \bb{not} + any further symbols, even if these should have been defined + before in the list! Together with the commands for + conditional assembly (see there) you may produce different + program versions out of one source file by command line + inputs.} +\item{\tty{A}: stores the list of global symbols in another, more compact + form. Use this option if the assembler crashes with a stack + overflow because of too long symbol tables. Sometimes this + option can increase the processing speed of the assembler, but + this depends on the sources.} +\item{\tty{x}: Sets the level of detail for error messages. The level + is increased resp. decreased by one each time this option is given. + While on level 0 (default) only the error message itself is printed, + an extended message is added beginning at level 1 that should + simplify the identification of the error's cause. Appendix + \ref{ChapErrMess} lists which error messages carry which extended + messages. At level 2 (maximum), the source line containing the + error is additionally printed.} +\item{\tty{n}: If this option is set, the error messages will be issued + additionally with their error number (see appendix + \ref{ChapErrMess}). This is primarily intended for use with shells + or IDE's to make the identification of errors easier by those + numbers.} +\item{\tty{U}: This option switches AS to the case-sensitive mode, i.e. + upper and lower case in the names of symbols, sections, macros, + character sets, and user-defined functions will be distinguished. + This is not the case by default.} +\item{\tty{P}: Instructs AS to write the source text processed by macro + processor and conditional assembly into a file. Additional + blank and pure comment lines are missing in this file. The + extension of this file is \tty{I}.} +\item{\tty{M}: If this switch is given, AS generates a file, that contains + definitions of macros defined in the source file that did not + use the \tty{NOEXPORT} option. This new file has the same name as + the source file, only the extension is modified into \tty{MAC}.} +\item{\tty{G}: this switch defines whether AS should produce code or not. + If switched off, the processing will be stopped after the macro + processor. This switch is activated by default (logically, + otherwise you would not get a code file). This switch can be + used in conjunction with the \tty{P} switch, if only the macro + processor of AS shall be used.} +\item{\tty{r [n]}: issue warnings if situations occur that force a further + pass. This information can be used to reduce the number of + passes. You may optionally specify the number of the + first pass where issuing of such messages shall start. + Without this argument, warnings will come starting with + the first pass. Be prepared for a bunch of messages!!} +\item{\tty{cpu $<$name$>$}: this switch allows to set the target processor + AS shall generate code for, in case the source file does not contain + a {\tt CPU} instruction and is not 68008 code.} +\item{\tty{alias $<$new$>$=$<$old$>$}: + defines the processor type \tty{$<$new$>$} to be an alias for the + type \tty{$<$old$>$}. See section \ref{SectAlias} for the sense of + processor aliases.} +\end{itemize} +As long as switches require no arguments and their concatenation does +not result in a multi-letter switch, it is possible to specify several +switches at one time, as in the following example : +\begin{verbatim} + as test*.asm firstprog -cl /i c:\as\8051\include +\end{verbatim} +All files \tty{TEST*.ASM} as well as the file \tty{FIRSTPROG.ASM} will be +assembled, whereby listings of all files are displayed on the +console terminal. Additional sharefiles will be generated in the C- +format. The assembler should search for additional include files +in the directory \verb!C:\AS\8051\INCLUDE!. + +This example shows that the assembler assumes \tty{ASM} as the default +extension for source files. + +A bit of caution should be applied when using switches that have +optional arguments: if a file specification immediately follows such +aswitch without the optional argument, AS will try to interprete the +file specification as argument - what of course fails: +\begin{verbatim} + as -g test.asm +\end{verbatim} +The solution in this case would either be to move the -g option the +end or to specify an explicit MAP argument. + + +Beside from specifying options in the command line, permanently +needed options may be placed in the environment variable \tty{ASCMD}. For +example, if someone always wants to have assembly listings and has a +fixed directory for include files, he can save a lot of typing with +the following command: +\begin{verbatim} + set ascmd=-L -i c:\as\8051\include +\end{verbatim} +The environment options are processed before the command line, +so options in the command line can override contradicting ones in the +environment variable. + +In the case of very long path names, space in the \tty{ASCMD} variable may +become a problem. For such cases a key file may be the alternative, +in which the options can be written in the same way as in the command +line or the \tty{ASCMD}-variable. But this file may contain several lines +each with a maximum length of 255 characters. In a key file it is +important, that for options which require an argument, switches and +argument have to be written in the \bb{same} line. AS gets informed of +the name of the key file by a \tty{@} aheaded in the \tty{ASCMD} variable, +e.g. +\begin{verbatim} +set ASCMD=@c:\as\as.key +\end{verbatim} +In order to neutralize options in the \tty{ASCMD} variable (or in the +key file), prefix the option with a plus sign. For example, if you +do not want to generate an assembly listing in an individual case, +the option can be retracted in this way: +\begin{verbatim} +as +L +\end{verbatim} +Naturally it is not consequently logical to deny an option by a +plus sign.... UNIX soit qui mal y pense. + +References to key files may not only come from the {\tt ASCMD} variable, +but also directly from the command line. Similarly to the {\tt ASCMD} +variable, prepend the file's name with a \@ character: +\begin{verbatim} + as @ .... +\end{verbatim} +The options read from a key file in this situation are processed as if +they had been written out in the command line in place of the reference, +{\em not} like the key file referenced by the {\tt ASCMD} variable that is +processed prior to the command line options. + +Referencing a key file from a key file itself is not allowed and will be +answered wit an error message by AS. + +In case that you like to start AS from another program or a shell and +this shell hands over only lower-case or capital letters in the +command line, the following workaround exists: if a tilde (\verb!~!) is put +in front of an option letter, the following letter is always +interpreted as a lower-case letter. Similarly a \tty{\#} demands the +interpretation as a capital letter. For example, the following +transformations result for: +\begin{verbatim} + /~I ---> /i + -#u ---> -U +\end{verbatim} +In dependence of the assembly's outcome, the assembler ends with +the following return codes: +\begin{description} +\item[0]{error free run, at maximum warnings occurred} +\item[1]{The assembler displayed only its command-line parameters and + terminated immediately afterwards.} +\item[2]{Errors occurred during assembly, no code file has been produced.} +\item[3]{A fatal error occurred what led to immediate termination of the run.} +\item[4]{An error occurred already while starting the assembler. + This may be a parameter error or a faulty overlay file.} +\item[255]{An internal error occurred during initialization that should not + occur in any case...reboot, try again, and contact me if the + problem is reproducible!} +\end{description} +Similar to UNIX, OS/2 \marginpar{{\em OS/2}} extends an application's data +segment on demand when the application really needs the memory. +Therefore, an output like +\begin{verbatim} + 511 KByte available memory +\end{verbatim} +does not indicate a shortly to come system crash due to memory lack, +it simply shows the distance to the limit when OS/2 will push up the +data segment's size again... + +As there is no compatible way in C \marginpar{{\em UNIX}} under different +operating systens to find out the amount of available memory resp. stack, +both lines are missing completely from the statistics the C version prints. + +%%--------------------------------------------------------------------------- + +\section{Format of the Input Files} +\label{AttrTypes} + +Like most assemblers, AS expects exactly one instruction per line +(blank lines are naturally allowed as well). The lines must not be +longer than 255 characters, additional characters are discarded. + +A single line has following format: +\begin{verbatim} +[label[:]] [.attr] [param[,param..]] [;comment] +\end{verbatim} +The colon for the label is optional, in case the label starts in the +first column (the consequence is that a mnemonic must not start in +column 1). It is necessary to set the colon in case the label does +not start in the first column so that AS is able to distinguish it +from a mnemonic. In the latter case, there must be at least one space +between colon and mnemonic if the processor belongs to a family that +supports an attribute that denotes an instruction format and is +separated from the mnemonic by a colon. This restriction is +necessary to avoid ambiguities: a distinction between a mnemonic with +format and a label with mnemonic would otherwise be impossible. + +Some signal processor families from Texas Instruments optionally use a +double line (\verb!||!) in place of the label to signify the prallel +execution with the previous instruction(s). If these two assembler +instructions become a single instruction word at machine level (C3x), an +additional label in front of the second instruction of course does not +make sense and is not allowed. The situation is different for the C6x +with its instruction packets of variable length: If someone wants to jump +into the middle of an instruction packet (bad style, if you ask me...), he +has to place the necessary label {\em before} into a separate line. The +same is valid for conditions, which however may be combined with the +double line in a single source line. + +The attribute is used by a couple of processors to specify variations or +different codings of a certain instruction. The most prominent usage of +the attibute is is the specification of the operand size, for example in +the case of the 680x0 family (table \ref{TabAttrs}). +\begin{table*}[htb] +\begin{center}\begin{tabular}{|l|l|l|} +\hline +attribute & arithmetic-logic instruction & jump instruction\\ +\hline +\hline +B & byte (8 bits) & --------- \\ +W & word (16 bits) & --------- \\ +L & long word (32 bits) & 16-bit-displacement \\ +Q & quad word (64 bits) & --------- \\ +S & single precision (32 bits) & 8-bit-displacement \\ +D & double precision (64 bits) & --------- \\ +X & extended precision (80/96 bits) & 32-bit-displacement \\ +P & decimal floating point (80/96 bits) & --------- \\ +\hline +\end{tabular}\end{center} +\caption{Allowed Attributes (Example 680x0) \label{TabAttrs}} +\end{table*} +\par +Since this manual is not also meant as a user's manual for the processor +families supported by AS, this is unfortunately not the place to enumerate +all possible attributes for all families. It should however be mentioned +that in general, not all instructions of a given instruction set allow all +attributes and that the omission of an attribute generally leads to the +usage of the ''natural'' operand size of a processor family. For more +thorough studies, consult a reasonable programmer's manual, e.g. +\cite{Williams} for the 68K's. + +In the case of TLCS-9000, H8/500, and M16(C), the attribute serves +both as an operand size specifier (if it is not obvious from the +operands) and as a description of the instruction format to be used. +A colon has to be used to separate the format from the operand size, +e.g. like this: +\begin{verbatim} + add.w:g rw10,rw8 +\end{verbatim} +This example does not show that there may be a format specification +without an operand size. In contrast, if an operand size is used +without a format specification, AS will automatically use the +shortest possible format. The allowed formats and operand sizes +again depend on the machine instruction and may be looked up e.g. in +\cite{Tosh900}, \cite{HitH8_5}, \cite{MitM16}, resp. \cite{MitM16C}. + +The number of instruction parameters depends on the mnemonic and is +principally located between 0 and 20. The separation of the parameters +from each other is to be performed only by commas (exception: DSP56xxx, +its parallel data transfers are separated with blanks). Commas that +are included in brackets or quotes, of course, are not taken into +consideration. + +Instead of a comment at the end, the whole line can consist of +comment if it starts in the first column with a semicolon. + +To separate the individual components you may also use tabulators +instead of spaces. + +%%--------------------------------------------------------------------------- + +\section{Format of the Listing} + +The listing produced by AS using the command line options i or I is +roughly divisible into the following parts : +\begin{enumerate} +\item{issue of the source code assembled;} +\item{symbol list;} +\item{usage list;} +\item{cross reference list.} +\end{enumerate} +The two last ones are only generated if they have been demanded by +additional command line options. + +In the first part, AS lists the complete contents of all source files +including the produced code. A line of this listing has the following +form: +\begin{verbatim} +[] /
+\end{verbatim} +In the field \tty{n}, AS displays the include nesting level. The main file +(the file where assembly was started) has the depth 0, an included +file from there has depth 1 etc.. Depth 0 is not displayed. + +In the field \tty{line}, the source line number of the referenced file is +issued. The first line of a file has the number 1. The address at +which the code generated from this line is written follows after the +slash in the field \tty{address}. + +The code produced is written behind \tty{address} in the field \tty{code}, +in hexadecimal notation. Depending on the processor type and actual +segment the values are formatted either as bytes or 16/32-bit-words. +If more code is generated than the field can take, additional lines +will be generated, in which case only this field is used. + +Finally, in the field \tty{source}, the line of the source file is issued in +its original form. + +The symbol table was designed in a way that it can be displayed on an +80-column display whenever possible. For symbols of ''normal length'', +a double column output is used. If symbols exceed (with their name +and value) the limit of 40 columns (characters), they will be issued +in a separate line. The output is done in alphabetical order. +Symbols that have been defined but were never used are marked with a +star (*) as prefix. + +The parts mentioned so far as well as the list of all macros/functions +defined can be selectively masked out from the listing. +This can be done by the already mentioned command line switch \tty{-t}. +There is an internal byte inside AS whose bits represent which parts +are to be written. The assignment of bits to parts of the listing is +listed in table \ref{TabTBits}. +\par +\begin{table*}[htb] +\begin{center}\begin{tabular}{|l|l|} +\hline +bit & part \\ +\hline +\hline +0 & source file(s) + produced code \\ +1 & symbol table \\ +2 & macro list \\ +3 & function list \\ +4 & line numbering \\ +5 & register symbol list \\ +7 & character set table \\ +\hline +\end{tabular}\end{center} +\caption{Assignment of Bits to Listing Components\label{TabTBits}} +\end{table*} +All bits are set to 1 by default, when using the switch +\begin{verbatim} +-t +\end{verbatim} +Bits set in \tty{$<$mask$>$} are cleared, so that the respective listing +parts are suppressed. Accordingly it is possible to switch on single +parts again with a plus sign, in case you had switched off too much +with the \tty{ASCMD} variable... If someone wants to have, for example, +only the symbol table, it is enough to write: +\begin{verbatim} +-t 2 +\end{verbatim} +The usage list issues the occupied areas hexadecimally for every +single segment. If the area has only one address, only this is written, +otherwise the first and last address. + +The cross reference list issues any defined symbol in alphabetical +order and has the following form: +\begin{verbatim} + symbol (=,/): + file : + [(m1)] ..... [(mk)] + . + . + file : + [(m1)] ..... [(mk)] +\end{verbatim} +The cross reference list lists for every symbol in which files and lines +it has been used. If a symbol was used several times in the same line, +this would be indicated by a number in brackets behind the line number. +If a symbol was never used, it would not appear in the list; The same is +true for a file that does not contain any references for the symbol in +question. + +\bb{CAUTION!} AS can only print the listing correctly if it was +previously informed about the output media's page length and width! +This has to be done with the \tty{PAGE} instruction (see there). The +preset default is a length of 60 lines and an unlimited line width. + +%%--------------------------------------------------------------------------- + +\section{Symbol Conventions} +\label{SectSymConv} + +Symbols are allowed to be up to 255 characters long (as hinted already +in the introduction) and are being distinguished on the whole +length, but the symbol names have to meet some conventions: + +Symbol names are allowed to consist of a random combination of +letters, digits, underlines and dots, whereby the first character must +not be a digit. The dot is only allowed to meet the MCS-51 notation of +register bits and should - as far as possible - not be used in own symbol +names. To separate symbol names in any case the underline (\tty{\_}) and not +the dot (\tty{.}) should be used . + +AS is by default not case-sensitive, i.e. it does not matter whether +one uses upper or lower case characters. The command line switch \tty{U} +however allows to switch AS into a mode where upper and lower case +makes a difference. The predefined symbol \tty{CASESENSITIVE} signifies +whether AS has been switched to this mode: TRUE means case-sensitiveness, +and FALSE its absence. + +Table \ref{TabPredefined} shows the most important symbols which are +predefined by AS. +\begin{table*}[htb] +\begin{center}\begin{tabular}{|l|l|} +\hline +name & meaning \\ +\hline +\hline +TRUE & logically ''true'' \\ +FALSE & logically ''false'' \\ +CONSTPI & Pi (3.1415.....) \\ +VERSION & version of AS in BCD-coding, \\ + & e.g. 1331 hex for version 1.33p1 \\ +ARCHITECTURE & target platform AS was compiled for, in \\ + & the style processor-manufacturer-operating \\ + & system \\ +DATE & date and \\ +TIME & time of the assembly (start) \\ +MOMCPU & current target CPU \\ + & (see the CPU instruction) \\ +MOMFILE & current source file \\ +MOMLINE & line number in source file \\ +MOMPASS & number of the currently running pass \\ +MOMSECTION & name of the current section \\ + & or an empty string \\ +\verb!*!, \$ resp. PC & current value of program counter \\ +\hline +\end{tabular}\end{center} +\caption{Predefined Symbols\label{TabPredefined}} +\end{table*} +\bb{CAUTION!} While it does not matter in case-sensitive mode which +combination of upper and lower case to use to reference predefined +symbols, one has to use exactly the version given above (only upper +case) when AS is in case-sensitive mode! + +Additionally some pseudo instructions define symbols that reflect the +value that has been set with these instructions. Their descriptions +are explained at the individual commands belonging to them. + +A hidden feature (that has to be used with care) is that symbol names +may be assembled from the contents of string symbols. This can be +achieved by framing the string symbol's name with braces and +inserting it into the new symbol's name. This allows for example to +define a symbol's name based on the value of another symbol: +\begin{verbatim} +cnt set cnt+1 +temp equ "\{CNT}" + jnz skip{temp} + . + . +skip{temp}: nop +\end{verbatim} +\bb{CAUTION:} The programmer has to assure that only valid symbol names +are generated! + +A complete list of all symbols predefined by AS can be found in +appendix \ref{AppInternSyms}. + +Apart from its value, every symbol also owns a marker which signifies to +which {\em segment} it belongs. Such a distinction is mainly needed for +processors that have more than one address space. The additional +information allows AS to issue a warning when a wrong instruction is used +to access a symbol from a certain address space. A segment attribute is +automatically added to a symbol when is gets defined via a label or a +special instruction like \tty{BIT}; a symbol defined via the ''allround +instructions'' \tty{SET} resp. \tty{EQU} is however ''typeless'', i.e. its +usage will never trigger warnings. A symbol's segment attribute may be +queried via the buit-in function \tty{SYMTYPE}, e.g.: +\begin{verbatim} +Label: + . + . +Attr equ symtype(Label) ; results in 1 +\end{verbatim} +The individual segment types have the assigned numbers listed in table +\ref{TabSegNums}. Register symbols which do not really fit into the order +of normal symbols are explained in section \ref{SectRegSyms}. The +\tty{SYMTYPE} function delivers -1 as result when called with an undefined +symbol as argument. +\begin{table}[htb] +\begin{center} +\begin{tabular}{|l|c|} +\hline +segment & return value \\ +\hline +$<$none$>$ & 0 \\ +CODE & 1 \\ +DATA & 2 \\ +IDATA & 3 \\ +XDATA & 4 \\ +YDATA & 5 \\ +BITDATA & 6 \\ +IO & 7 \\ +REG & 8 \\ +ROMDATA & 9 \\ +$<$register symbol$>$ & 128 \\ +\hline +\end{tabular} +\end{center} +\caption{return values of the \tty{SYMTYPE} function\label{TabSegNums}} +\end{table} + +%%--------------------------------------------------------------------------- + +\section{Formula Expressions} + +In most places where the assembler expects numeric inputs, it is +possible to specify not only simple symbols or constants, but also +complete formula expressions. The components of these formula +expressions can be either single symbols and constants. Constants may be +either integer, floating point, or string constants. + +\subsection{Integer Constants} +\label{SectIntConsts} + +Integer constants describe non-fractional numbers. They may either be +written as a sequence of digits or as a sequence of characters enclosed in +{\em single} quotation marks. In case they are written as a sequence of +digits, this may be done in different numbering systems (table +\ref{TabSystems}). +\par +\begin{table*}[htb] +\begin{center}\begin{tabular}{|l|c|c|c|} +\hline + & Intel mode & Motorola mode & C mode \\ + & (Intel, Zilog, & (Rockwell, Motorola, & (PowerPC, \\ + & Thomson Texas, & Microchip, Thomson, & AMD 29K, \\ + & Toshiba, NEC, & Hitachi, Atmel) & National, \\ + & Siemens, Philips, & & Symbios) \\ + & Fujitsu, Fairchild) & & \\ +\hline +\hline +decimal & direct & direct & direct \\ +hexadecimal & followed by H & aheaded \$ & aheaded 0x \\ +binary & followed by B & aheaded \% & aheaded 0b \\ +octal & followed by O & aheaded @ & aheaded 0 \\ +\hline +\end{tabular}\end{center} +\caption{Possible Numbering Systems\label{TabSystems}} +\end{table*} +In case the numbering system has not been explicitly stated by adding the +special control characters listed in the table, AS assumes the base given +with the {\tt RADIX} statement (which has itself 10 as default). This +statement allows to set up 'unusual' numbering systems, i.e. others than +2, 8, 10, or 16. + +Valid digits are numbers from 0 to 9 and letters from A to Z (value 10 to +35) up to the numbering system's base minus one. The usage of letters in +integer constants however brings along some ambiguities since symbol names +also are sequences of numbers and letters: a symbol name must not start +with a character from 0 to 9. This means that an integer constant which +is not clearly marked a such with a special prefix character never mav +begin with a letter. One has to add an additional, otherwise superfluous +zero in front in such cases. The most prominent case is the writing of +hexadecimal constants in Intel mode: If the leftmost digit is between A +and F, the trailing H doesn't help anything, an additional 0 has to be +prefixed (e.g. 0F0H instead of F0H). The Motorola and C syntaxes whis +both mark the numbering system at the front of a constant do not have this +problem (\ii{hehehe..}). + +Quite tricky is furthermore that the higher the default numbering system +set via {\tt RADIX} becomes, the more letters used to denote numbering +systems in Intel and C syntax become 'eaten'. For example, you cannot +write binary constants anymore after a {\tt RADIX 16}, and starting at +{\tt RADIX 18}, the Intel syntax even doesn't allow to write hexadecimal +constants any more. Therefore {\bf CAUTION!} + +With the help of the \tty{RELAXED} instruction (see section \ref{SectRELAXED}), +the strict assignment of a syntax to a certain target processor can be +removed. The result is that an arbitrary syntax may be used (loosing +compatibility to standard assemblers). This option is however turned off +by default. + +Integer constants may also be written as ASCII values, like in +the following examples: +\begin{verbatim} +'A' ==$41 +'AB' ==$4142 +'ABCD' ==$41424344 +\end{verbatim} +It is important to write the characters in {\em single quotes}, to +distinguish them from string constants (discussed somewhat later). + +\subsection{Floating Point Constants} + +Floating point constants are to be written in the usual scientific +notation, which is known in the most general form: +\begin{verbatim} + [-][.post decimal positions][E[-]exponent] +\end{verbatim} +\bb{CAUTION!} The assembler first tries to interprete a constant as an +integer constant and makes a floating-point format try only in case +the first one failed. If someone wants to enforce the evaluation as +a floating point number, this can be done by dummy post decimal +positions, e.g. \tty{2.0} instead of \tty{2}. + +\subsection{String Constants} +\label{SectStringConsts} + +String constants have to be included in {\em double quotation} marks (to +distinguish them from the abovementioned ASCII-integers). In order to +make it possible to write quotation marks and special characters without +trouble in string constants, an ''escape mechanism'' has been implemented, +which should sound familiar for C programmers: + +The assembler understands a backslash (\verb!\!) with a following decimal +number of three digits maximum in the string as a character with the +according decimal ASCII value. The numerical value may alternitavely be +written in hexadecimal or octal notation if it is prefixed with an x resp. +a 0. In case of hexadecimal notation, the maximum number of digits is +limited to 2. For example, it is possible to include an ETC character by +writing {\tt\verb!\!3}. But be careful with the definition of NUL +characters! The C \marginpar{{\em UNIX}} version currently uses C strings +to store strings internally. As C strings use a NUL character for +termination, the usage of NUL characters in strings is currently not +portable! + +Some frequently used control characters can also be reached with the +following abbreviations: +\begin{verbatim} +\b : Backspace \a : Bell \e : Escape +\t : Tabulator \n : Linefeed \r : Carriage Return +\\ : Backslash \' or \H : Apostrophe +\" or \I : Quotation marks +\end{verbatim} +Both upper and lower case characters may be used for the +identification letters. + +By means of this escape character, you can even work formula +expressions into a string, if they are enclosed by braces: e.g. +\begin{verbatim} + message "root of 81 : \{sqrt(81)}" +\end{verbatim} +results in +\begin{verbatim} + root of 81 : 9 +\end{verbatim} +AS chooses with the help of the formula result type the correct +output format, further string constants, however, are to be avoided +in the expression. Otherwise the assembler will get mixed up at the +transformation of capitals into lower case letters. Integer results will +by default be written in hexadecimal notation, which may be changed via +the \tty{OUTRADIX} instruction. + +Except for the insertion of formula expressions, you can use this +''escape-mechanism'' as well in ASCII defined integer constants, +like this: +\begin{verbatim} + move.b #'\n',d0 +\end{verbatim} +However, everything has its limits, because the parser with higher +priority, which disassembles a line into op-code and parameters, does +not know what it is actually working with, e.g. here: +\begin{verbatim} + move.l #'\'abc',d0 +\end{verbatim} +After the third apostrophe, it will not find the comma any more, +because it presumes that it is the start of a further character +constant. An error message about a wrong parameter number is the result. +A workaround would be to write e.g., \verb!\i! instead of \verb!\'!. + +\subsection{Evaluation} + +The calculation of intermediary results within formula expressions is +always done with the highest available resolution, i.e. 32 bits for +integer numbers, 80 bit for floating point numbers and 255 characters +for strings. An possible test of value range overflows is done only +on the final result. + +The portable C version \marginpar{{\em UNIX}} only supports floating +point values up to 64 bits (resulting in a maximum value of roughly +$10^{308}$), but in turn features integer lengths of 64 bits on some +platforms. + +\subsection{Operators} + +The assembler provides the operands listed in table \ref{TabOps} for +combination. +\begin{table*}[htbp] +\begin{center}\begin{tabular}{|c|l|c|c|c|c|c|} +\hline +operand & function & \#operands & integer & float & string & rank \\ +\hline +\hline +$<>$ & inequality & 2 & yes & yes & yes & 14 \\ +$>=$ & greater or equal & 2 & yes & yes & yes & 14 \\ +$<=$ & less or equal & 2 & yes & yes & yes & 14 \\ +$<$ & truly smaller & 2 & yes & yes & yes & 14 \\ +$>$ & truly greater & 2 & yes & yes & yes & 14 \\ +$=$ & equality & 2 & yes & yes & yes & 14 \\ +$==$ & alias for $=$ & & & & & \\ + & & & & & & \\ +$!!$ & log. XOR & 2 & yes & no & no & 13 \\ +$||$ & log. OR & 2 & yes & no & no & 12 \\ +\&\& & log. AND & 2 & yes & no & no & 11 \\ +\verb! ~~ ! & log. NOT & 1 & yes & no & no & 2 \\ + & & & & & & \\ +- & difference & 2 & yes & yes & no & 10 \\ ++ & sum & 2 & yes & yes & yes & 10 \\ +\# & modulo division & 2 & yes & no & no & 9 \\ +/ & quotient & 2 & yes*) & yes & no & 9 \\ +\verb! * ! & product & 2 & yes & yes & no & 9 \\ +\verb! ^ ! & power & 2 & yes & yes & no & 8 \\ + & & & & & & \\ +$!$ & binary XOR & 2 & yes & no & no & 7 \\ +$|$ & binary OR & 2 & yes & no & no & 6 \\ +\& & binary AND & 2 & yes & no & no & 5 \\ +$><$ & mirror of bits & 2 & yes & no & no & 4 \\ +$>>$ & log. shift right & 2 & yes & no & no & 3 \\ +$<<$ & log. shift left & 2 & yes & no & no & 3 \\ +\verb! ~ ! & binary NOT & 1 & yes & no & no & 1 \\ +\hline +\multicolumn{7}{|l|}{*) remainder will be discarded} \\ +\hline +\end{tabular}\end{center} +\caption{Operators Predefined by AS\label{TabOps}} +\end{table*} +''Rank'' is the priority of an operator at the separation of expressions +into subexpressions. The operator with the highest rank will be +evaluated at the very end. The order of evaluation can be defined by +new bracketing. + +The compare operators deliver TRUE in case the condition fits, +and FALSE in case it doesn't. For the logical operators an expression +is TRUE in case it is not 0, otherwise it is FALSE. + +The mirroring of bits probably needs a little bit of explanation: the +operator mirrors the lowest bits in the first operand and leaves the +higher priority bits unchanged. The number of bits which is to be +mirrored is given by the right operand and may be between 1 and 32 . + +A small pitfall is hidden in the binary complement: As the +computation is always done with 32 resp. 64 bits, its application on +e.g. 8-bit masks usually results in values taht do not fit into 8-bit +numbers any more due to the leading ones. A binary AND with a +fitting mask is therefore unavoidable! + +\subsection{Functions} + +In addition to the operators, the assembler defines another line of +primarily transcendental functions with floating point arguments which are +listed in tables \ref{TabFuncs1} and \ref{TabFuncs2}. +\begin{table*}[htbp] +\begin{center}\begin{tabular}{|l|l|l|l|} +\hline +name & meaning & argument & result \\ +\hline +\hline +SQRT & square root & $arg \geq 0$ & floating point \\ + & & & \\ +SIN & sine & $arg \in \rz$ & floating point \\ +COS & cosine & $arg \in \rz$ & floating point \\ +TAN & tangent & $arg \neq (2n+1)*\frac{\pi}{2}$ & floating point \\ +COT & cotangent & $arg \neq n*\pi$ & floating point \\ + & & & \\ +ASIN & inverse sine & $\mid arg \mid \leq 1$ & floating point \\ +ACOS & inverse cosine & $\mid arg \mid \leq 1$ & floating point \\ +ATAN & inverse tangent & $arg \in \rz$ & floating point \\ +ACOT & inverse cotangent & $arg \in \rz$ & floating point \\ + & & & \\ +EXP & exponential function & $arg \in \rz$ & floating point \\ +ALOG & 10 power of argument & $arg \in \rz$ & floating point \\ +ALD & 2 power of argument & $arg \in \rz$ & floating point \\ +SINH & hyp. sine & $arg \in \rz$ & floating point \\ +COSH & hyp. cosine & $arg \in \rz$ & floating point \\ +TANH & hyp. tangent & $arg \in \rz$ & floating point \\ +COTH & hyp. cotangent & $arg \neq 0$ & floating point \\ + & & & \\ +LN & nat. logarithm & $arg > 0$ & floating point \\ +LOG & dec. logarithm & $arg > 0$ & floating point \\ +LD & bin. logarithm & $arg > 0$ & floating point \\ +ASINH & inv. hyp. Sine & $arg \in \rz$ & floating point \\ +ACOSH & inv. hyp. Cosine & $arg \geq 1$ & floating point \\ +ATANH & inv. hyp. Tangent & $arg < 1$ & floating point \\ +ACOTH & inv. hyp. Cotangent & $arg > 1$ & floating point \\ + & & & \\ +INT & integer part & $arg \in \rz$ & floating point \\ +\hline +BITCNT & number of one's & integer & integer \\ +FIRSTBIT & lowest 1-bit & integer & integer \\ +\hline +\end{tabular}\end{center} +\caption{Functions Predefined by AS - Part 1 (Integer and + Floating Point Functions \label{TabFuncs1}} +\end{table*} +\begin{table*}[htbp] +\begin{center}\begin{tabular}{|l|l|l|l|} +\hline +name & meaning & argument & result \\ +\hline +\hline +LASTBIT & highest 1-bit & integer & integer \\ +BITPOS & unique 1-bit & integer & integer \\ + & & & \\ +SGN & sign (0/1/-1) & floating point & integer \\ + & & or integer & \\ +ABS & absolute value & integer or & integer or \\ + & & floating point & floating point \\ +TOUPPER & matching capital & integer & integer \\ +TOLOWER & matching lower case & integer & integer \\ + & & & \\ +UPSTRING & changes all & string & string \\ + & characters & & \\ + & into capitals & & \\ + & & & \\ +LOWSTRING & changes all & string & string \\ + & characters & & \\ + & into to lower case & & \\ + & & & \\ +STRLEN & returns the length & string & integer \\ + & of a string & & \\ + & & & \\ +SUBSTR & extracts parts of a & string, & string \\ + & string & integer, & \\ + & & integer & \\ +STRSTR & searches a substring & string, & integer \\ + & in a string & string & \\ +VAL & evaluates contents & string & depends on \\ + & as expression & & argument \\ +\hline +\end{tabular}\end{center} +\caption{Functions Predefined by AS - Part 2 (Integer and + String Functions \label{TabFuncs2}} +\end{table*} +The functions \tty{FIRSTBIT}, \tty{LASTBIT}, and \tty{BITPOS} return -1 as +result if no resp. not exactly one bit is set. \tty{BITPOS} additionally +issues an error message in such a case. + +The string function \tty{SUBSTR} expects the source string as first +parameter, the start position as second and the number of characters to be +extracted as third parameter (a 0 means to extract all characters up to +the end). \tty{STRSTR} returns the first occurence of the second string +within the first one resp. -1 if the search pattern was not found. Both +functions number characters in a string starting at 0! + +If a function expects floating point arguments, this does not mean it +is impossible to write e.g. +\begin{verbatim} + sqr2 equ sqrt(2) +\end{verbatim} +In such cases an automatic type conversion is engaged. In the reverse +case the \tty{INT}-function has to be applied to convert a floating point +number to an integer. When using this function, you have to pay +attention that the result produced always is a signed integer and +therefore has a value range of approximately +/-2.0E9. + +When AS is switched to case-sensitive mode, predefined functions may be +accessed with an arbitrary combination of upper and lower case (in +contrast to predefined symbols). However, in the case of user-defined +functions (see section \ref{SectFUNCTION}), a distinction between upper +and lower case is made. This has e.g. the result that if one defines a +function \tty{Sin}, one can afterwards access this function via \tty{Sin}, but all +other combinations of upper and lower case will lead to the predefined +function. + +For a correct conversion \marginpar{{\em DOS/DPMI}} of lower case letters +into capital letters a DOS version $\geq$ 3.30 is required. + +%%--------------------------------------------------------------------------- + +\section{Forward References and Other Disasters} + +This section is the result of a significant amount of hate on the +(legal) way some people program. This way can lead to trouble in +conjunction with AS in some cases. The section will deal with +so-called 'forward references'. What makes a forward reference +different from a usual reference? To understand the difference, take +a look at the following programming example (please excuse my bias +for the 68000 family that is also present in the rest of this +manual): +\begin{verbatim} + move.l d0,#10 +loop: move.l d1,(a1) + beq skip + neg.l d1 +skip: move.l (a1+),d1 + dbra d0,loop +\end{verbatim} +If one overlooks the loop body with its branch statement, a program +remains that is extremely simple to assemble: the only reference is +the branch back to the body's beginning, and as an assembler +processes a program from the beginning to the end, the symbol's value +is already known before it is needed the first time. If one has a +program that only contains such backward references, one has the nice +situation that only one pass through the source code is needed to +generate a correct and optimal machine code. Some high level +languages like Pascal with their strict rule that everything has to +be defined before it is used exploit exactly this property to speed +up the compilation. + +Unfortunately, things are not that simple in the case of assembler, +because one sometimes has to jump forward in the code or there are +reasons why one has to move variable definitions behind the code. +For our example, this is the case for the conditional branch that is +used to skip over another instruction. When the assembler hits the +branch instruction in the first pass, it is confronted with the +situation of either leaving blank all instruction fields related to +the target address or offering a value that ''hurts noone'' via the +formula parser (which has to evaluate the address argument). In case +of a ''simple'' assembler that supports only one target architecture +with a relatively small number of instructions to treat, one will +surely prefer the first solution, but the effort for AS with its +dozens of target architectures would have become extremely high. +Only the second way was possible: If an unknown symbol is detected in +the first pass, the formula parser delivers the program counter's +current value as result! This is the only value suitable to offer an +address to a branch instruction with unknown distance length that +will not lead to errors. This answers also a frequently asked +question why a first-pass listing (it will not be erased e.g. when AS +does not start a second pass due to additional errors) partially +shows wrong addresses in the generated binary code - they are the +result of unresolved forward references. + +The example listed above however uncovers an additional difficulty of +forward references: Depending on the distance of branch instruction +and target in the source code, the branch may be either long or +short. The decision however about the code length - and therefore +about the addresses of following labels - cannot be made in the first +pass due to missing knowledge about the target address. In case the +programmer did not explicitly mark whether a long or short branch +shall be used, genuine 2-pass assemblers like older versions of MASM +from Microsoft ''solve'' the problem by reserving space for the longest +version in the first pass (all label addresses have to be fixed after +the first pass) and filling the remaining space with \tty{NOP}s in the +second pass. AS versions up to 1.37 did the same before I switched +to the multipass principle that removes the strict separation into +two passes and allows an arbitrary number of passes. Said in detail, +the optimal code for the assumed values is generated in the first +pass. In case AS detects that values of symbols changed in the second +pass due to changes in code lengths, simply a third pass is done, and +as the second pass'es new symbol values might again shorten or +lengthen the code, a further pass is not impossible. I have seen +8086 programs that needed 12 passes to get everything correct and +optimal. Unfortunately, this mechanism does not allow to specify a +maximum number passes; I can only advise that the number of passes +goes down when one makes more use of explicit length specifications. + +Especially for large programs, another situation might arise: the +position of a forward directed branch has moved so much in the second +pass relative to the first pass that the old label value still valid +is out of the allowed branch distance. AS knows of such situations +and suppresses all error messages about too long branches when it is +clear that another pass is needed. This works for 99\% of all cases, +but there are also constructs where the first critical instruction +appears so early that AS had no chance up to now to recognize that +another pass is needed. The following example constructs such a +situation with the help of a forward reference (and was the reason +for this section's heading...): +\begin{verbatim} + cpu 6811 + + org $8000 + beq skip + rept 60 + ldd Var + endm +skip: nop + +Var equ $10 +\end{verbatim} +Due to the address position, AS assumes long addresses in the first +pass for the \tty{LDD} instructions, what results in a code length of 180 +bytes and an out of branch error message in the second pass (at the +point of the \tty{BEQ} instruction, the old value of \tty{skip} is still valid, +i.e. AS does not know at this point that the code is only 120 bytes +long in reality) is the result. The error can be avoided in three +different ways: +\begin{enumerate} +\item{Explicitly tell AS to use short addressing for the \tty{LDD} + instructions (\tty{ldd label $ +\end{verbatim} +generates a symbol with correct attributes. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{BIT} +\ttindex{BIT} + +{\em valid for: MCS/(2)51, XA, 80C166, 75K0, ST9} + +\tty{BIT} serves to equate a single bit of a memory cell with a symbolic +name. This instruction varies from target platform to target platform due +to the different ways in which processors handle bit manipulation and +addressing: + +The MCS/51 family has an own address space for bit operands. The function +of \tty{BIT} is therefore quite similar to \tty{SFR}, i.e. a simple integer +symbol with the specified value is generated and assigned to the +\tty{BDATA} segment. For all other processors, bit addressing is done in +a two-dimensional fashion with address and bit position. In these cases, +AS packs both parts into an integer symbol in a way that depends on the +currently active target processor and separates both parts again when the +symbol is used. The latter is is also valid for the 80C251: While an +instruction like +\begin{verbatim} +My_Carry bit PSW.7 +\end{verbatim} +would assign the value 0d7h to \tty{My\_Carry} on an 8051, a value of +070000d0h would be generated on an 80C251, i.e. the address is located in +bits 0..7 and the bit position in bits 24..26. This procedure is equal to +the way the \tty{DBIT} instruction handles things on a TMS370 and is also +used on the 80C166, with the only difference that bit positions may range +from 0..15: +\begin{verbatim} +MSB BIT r5.15 +\end{verbatim} +On a Philips XA, the bit's address is located in bits 0..9 just with +the same coding as used in machine instructions, and the 64K bank of +bits in RAM memory is placed in bits 16..23. + +The \tty{BIT} instruction of the 75K0 family even goes further: As bit +expressions may not only use absolute base addresses, even expressions +like +\begin{verbatim} +bit1 BIT @h+5.2 +\end{verbatim} +are allowed. + +The ST9 in turn allows to invert bits, what is also allowed in the +\tty{BIT} instruction: +\begin{verbatim} +invbit BIT r6.!3 +\end{verbatim} +More about the ST9's \tty{BIT} instruction can be found in the processor +specific hints. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{DBIT} +\ttindex{DBIT} + +{\em valid for: TMS 370xxx} + +Though the TMS370 series does not have an explicit bit segment, single bit +symbols may be simulated with this instruction. \tty{DBIT} requires two +operands, the address of the memory cell that contains the bit and the +exact position of the bit in the byte. For example, +\begin{verbatim} +INT3 EQU P019 +INT3_ENABLE DBIT 0,INT3 +\end{verbatim} +defines the bit that enables interrupts via the INT3 pin. Bits defined +this way may be used in the instructions \tty{SBIT0, SBIT1, CMPBIT, +JBIT0}, and \tty{JBIT}. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{PORT} +\ttindex{PORT} + +{\em valid for: 8080/8085/8086, XA, Z80, 320xx, TLCS-47, AVR} + +\tty{PORT} works similar to \tty{EQU}, just the symbol becomes assigned to the +I/O-address range. Allowed values are 0..7 at the 3201x, 0..15 at the +320C2x, 0..65535 at the 8086, 0..63 at the AVR, and 0..255 at the rest. + +Example : an 8255 PIO is located at address 20H: +\begin{verbatim} +PIO_port_A port 20h +PIO_port_B port PIO_port_A+1 +PIO_port_C port PIO_port_A+2 +PIO_ctrl port PIO_port_A+3 +\end{verbatim} + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{REG} +\ttindex{REG} + +{\em valid for: AVR, M*Core, ST9, 80C16x} + +Though it always has the same syntax, this instruction has a slightly +different meaning from processor to processor: If the processor uses a +separate addressing space for registers, \tty{REG} has the same effect as +a simple \tty{EQU} for this address space (e.g. for the ST9). \tty{REG} +defines register symbols for all other processors whose function is +described in section \ref{SectRegSyms}. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{LIV and RIV} +\ttindex{LIV}\ttindex{RIV} + +{\em valid for: 8X30x} + +\tty{LIV} and \tty{RIV} allow to define so-called ''IV bus objects''. +These are +groups of bits located in a peripheral memory cell with a length of 1 +up to 8 bits, which can afterwards be referenced symbolically. The +result is that one does not anymore have to specify address, +position, and length separately for instructions that can refer to +peripheral bit groups. As the 8X30x processors feature two +peripheral address spaces (a ''left'' and a ''right'' one), there are two +separate pseudo instructions. The parameters of these instructions +are however equal: three parameters have to be given that specify +address, start position and length. Further hints for the usage of +bus objects can be found in section \ref{8X30xSpec} . + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{CHARSET} +\ttindex{CHARSET} + +{\em valid for: all processors} + +Single board systems, especially when driving LCDs, frequently use +character sets different to ASCII. So it is probably purely coincidental +that the umlaut coding corresponds with the one used by the PC. To avoid +error-prone manual encoding, the assembler contains a translation table +for characters which assigns a target character to each source-code. To +modify this table (which initial translates 1:1), one has to use the +\tty{CHARSET} instruction. \tty{CHARSET} may be used with different +numbers and types of parameters. If there is only a single parameter, it +has to be a string expression which is interpreted as a file name by AS. +AS reads the first 256 bytes from this table and copies them into the +translation table. This allows to activate complex, externally generated +tables with a single statement. For all other variants, the first +parameter has to be an integer in the range of 0 to 255 which designates +the start index of the entries to be modified in the translation table. +One or two parameters follow, giving the type of modification: + +A single additional integer modies exactly one entry. For example, +\begin{quote}{\tt + CHARSET '\"a',128 +}\end{quote} +means that the target system codes the '\"a' into the number 128 +(80H). If however two more integers are given, the first one describes +the last entry to be modified, and the second the new value of the first +table entry. All entries up to the index end are loaded sequentially. +For example, in case that the target system does not support lower-case +characters, a simple +\begin{verbatim} + CHARSET 'a','z','A' +\end{verbatim} +translates all lower-case characters automatically into the +matching capital letters. + +For the last variant, a string follows the start index and contains the +characters to be placed in the table. The last example therefore may also +be written as +\begin{verbatim} + CHARSET 'a',"ABCDEFGHIJKLMNOPQRSTUVWXYZ" +\end{verbatim} + +\tty{CHARSET} may also be called without any parameters, which however has +a drastical effect: the translation table is reinitialized to its initial +state, i.e. all character translations are removed. + +\bb{CAUTION!} \tty{CHARSET} not only affects string constants stored in +memory, but also integer constants written as ''ASCII''. This means that +an already modified translation table can lead to other results in the +above mentioned examples! + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{CODEPAGE} +\ttindex{CODEPAGE} + +{\em valid for: all processors} + +Though the \tty{CHARSET} statement gives unlimited freedom in the +character assignment between host and target platform, switching among +different character {\em sets} can become quite tedious if several +character sets have to be supported on the target platform. The +\tty{CODEPAGE} instruction however allows to define and keep different +character sets and to switch with a single statement among them. +\tty{CODEPAGE} expects one or two arguments: the name of the set to be +used hereafter and optionally the name of another table that defines its +initial contents (the second parameter therefore only has a meaning for +the first switch to the table when AS automatically creates it). If the +second parameter is missing, the initial contents of the new table are +copied from the previously active set. All subsequent \tty{CHARSET} +statements {\em only} modify the new set. + +At the beginning of a pass, AS automatically creates a single character +set with the name \tty{STANDARD} with a one-to-one translation. If no +\tty{CODEPAGE} instructions are used, all settings made via \tty{CHARSET} +refer to this table. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{ENUM} +\ttindex{ENUM} + +{\em valid for: all processors} + +Similar to the same-named instruction known from C, \tty{ENUM} is used to +define enumeration types, i.e. a sequence of integer constants that +are assigned sequential values starting at 0. The parameters are the +names of the symbols, like in the following example: +\begin{verbatim} + ENUM SymA,SymB,SymC +\end{verbatim} +This instruction will assign the values 0, 1, and 2 to the symbols +\tty{SymA, SymB,} and \tty{SymC}. + +\tty{ENUM} instructions are always single-line instructions, i.e. the +enumeration will again start at zero when a new \tty{ENUM} instruction is +found. Multi-line enumerations may however be achieved with a small trick +that exploits the fact that the internal counter can be set to a new value +with an explicit assignment, like in the following case: +\begin{verbatim} + ENUM January=1,February,March,April,May,June +\end{verbatim} +The numeric values 1..6 are assigned to month names. One can +continue the enumeration in the following way: +\begin{verbatim} + ENUM July=June+1,August,September,October + ENUM November=October+1,December +\end{verbatim} +A definition of a symbol with \tty{ENUM} is equal to a definition with +\tty{EQU}, i.e. it is not possible to assign a new value to a symbol that +already exists. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{STRUCT and ENDSTRUCT} +\ttindex{STRUCT}\ttindex{ENDSTRUCT} + +{\em valid for: all processors} + +Even for assembler programs, there is from time to time the need to +define complex data structures similar to high-level languages. AS +supports this via the instructions \tty{STRUCT} and \tty{ENDSTRUCT} that begin +resp. finish the definition of such a structure. The operation is +simple: Upon occurrence of a \tty{STRUCT}, the current value of the program +counter is saved and the PC is reset to zero. By doing so, all +labels placed obtain the offset values of the structure's members. +The reservation of space for the individual fields is done with the +instructions used on the currently active processor to reserve memory +space, e.g. \tty{DS.x} for Motorolas and \tty{DB} \& co. for Intels. The label +prepended to \tty{STRUCT} (not optional) is the record's name and may +optionally be repeated for the \tty{ENDSTRUCT} statement. \tty{ENDSTRUCT} +furthermore places the record's total length in the symbol +\tty{$<$Name\_len$>$} +(one may force the usage of another symbol by giving its name as an +argument to \tty{ENDSTRUCT}). For example, in the definition +\begin{verbatim} +Rec STRUCT +Ident db ? +Pad db ? +Pointer dd ? +Rec ENDSTRUCT +\end{verbatim} +the symbol \tty{Rec\_len} would obtain the value 6. \bb{CAUTION!} Inside +of a structure definition, no instructions may be used that generate code, +as this is a pure placement of elements in the address space! + +\tty{STRUCT} definitions may be nested; after the inner \tty{STRUCT} definition +has been ended, the address counter of the outer structure will be +automatically incremented by the inner structure's size (the counting +inside the inner structure of course starts at zero). + +To avoid ambiguities when fields in different structures have same +names, AS by default prepends the structures name to the field names, +separated by an underbar. For the example listed above, the symbols +\tty{Rec\_Ident, Rec\_Pad,} and \tty{Rec\_Pointer} would be created. This +behaviour can be suppressed by giving \tty{NOEXTNAMES} as a parameter to +the \tty{STRUCT} statement. This works in the same sense for nested +structure definitions, i.e. field names are extended by the names of all +structures that did not obtain a \tty{NOEXTNAMES} directive. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{PUSHV and POPV} +\ttindex{PUSHV}\ttindex{POPV} + +{\em valid for: all processors} + +\tty{PUSHV} and \tty{POPV} allow to temporarily save the value of a symbol +(that is not macro-local) and to restore it at a later point of time. The +storage is done on stacks, i.e. Last-In-First-Out memory structures. A +stack has a name that has to fulfill the general rules for symbol names +and it exists as long as it contains at least one element: a stack that +did not exist before is automatically created upon \tty{PUSHV}, and a +stack becoming empty upon a \tty{POPV} is deleted automatically. The name +of the stack that shall be used to save or restore symbols is the first +parameter of \tty{PUSH} resp. \tty{POPV}, followed by a list of symbols as +further parameters. All symbols referenced in the list already have to +exist, it is therefore \bb{not} possible to implicitly define symbols with +a \tty{POPV} instruction. + +Stacks are a global resource, i.e. their names are not local to +sections. + +It is important to note that symbol lists are \bb{always} processed from +left to right. Someone who wants to pop several variables from a stack +with a \tty{POPV} therefore has to use the exact reverse order used in the +corresponding \tty{PUSHV}! + +The name of the stack may be left blank, like this: +\begin{verbatim} + pushv ,var1,var2,var3 + . + . + popv ,var3,var2,var1 +\end{verbatim} +AS will then use a predefined internal default stack. + +AS checks at the end of a pass if there are stacks that are not empty and +issues their names together with their ''filling level''. This allows to +find out if there are any unpaired \tty{PUSHVs} or \tty{POPVs}. However, +it is in no case possible to save values in a stack beyond the end of a +pass: all stacks are cleared at the beginning of a pass! + +%%--------------------------------------------------------------------------- + +\section{Code Modification} + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{ORG} +\label{SectORG} +\ttindex{ORG} + +{\em valid for: all processors} + +\tty{ORG} allows to load the internal address counter (of the assembler) +with a new value. The value range depends on the currently selected +segment and on the processor type (tables \ref{TabORG1} to \ref{TabORG4}). +The lower bound is always zero, and the upper bound is the given value +minus 1: +\small +\begin{table*}[htbp] +\begin{center}\begin{tabular}{|l|c|c|c|c|c|c|c|c|c|} +\hline +\tin{processor} & \tin{CODE} & \tin{DATA} & \tin{IDATA} & \tin{XDATA} & \tin{YDATA} & \tin{BITDATA} & \tin{IO} & \tin{REG} & \tin{ROMDATA} \\ +\hline +\hline +\input{../doc_DE/taborg1.tex} +\hline +\end{tabular}\end{center} +\caption{Address Ranges for \tty{ORG} --- Part 1\label{TabORG1}} +\end{table*} +\begin{table*}[htbp] +\begin{center}\begin{tabular}{|l|c|c|c|c|c|c|c|c|c|} +\hline +\tin{processor} & \tin{CODE} & \tin{DATA} & \tin{IDATA} & \tin{XDATA} & \tin{YDATA} & \tin{BITDATA} & \tin{IO} & \tin{REG} & \tin{ROMDATA} \\ +\hline +\hline +\input{../doc_DE/taborg2.tex} +\hline +\multicolumn{10}{|l|}{*) As the 8051 does not have any RAM beyond 80h, this value has to be} \\ +\multicolumn{10}{|l|}{ adapted with ORG for the 8051 as target processor!!}\\ +\hline +\multicolumn{10}{|l|}{+) As the Z180 still can address only 64K logically, the whole}\\ +\multicolumn{10}{|l|}{ address space can only be reached via \tty{PHASE} instructions!}\\ +\hline +\end{tabular}\end{center} +\caption{Address Ranges for \tty{ORG} --- Part 2\label{TabORG2}} +\end{table*} +\begin{table*}[htbp] +\begin{center}\begin{tabular}{|l|c|c|c|c|c|c|c|c|c|} +\hline +\tin{processor} & \tin{CODE} & \tin{DATA} & \tin{IDATA} & \tin{XDATA} & \tin{YDATA} & \tin{BITDATA} & \tin{IO} & \tin{REG} & \tin{ROMDATA} \\ +\hline +\hline +\input{../doc_DE/taborg3.tex} +\hline +\end{tabular}\end{center} +\caption{Address Ranges for \tty{ORG} --- Part 3\label{TabORG3}} +\end{table*} +\begin{table*}[htbp] +\begin{center}\begin{tabular}{|l|c|c|c|c|c|c|c|c|c|} +\hline +\tin{processor} & \tin{CODE} & \tin{DATA} & \tin{IDATA} & \tin{XDATA} & \tin{YDATA} & \tin{BITDATA} & \tin{IO} & \tin{REG} & \tin{ROMDATA} \\ +\hline +\hline +\input{../doc_DE/taborg4.tex} +\hline +\end{tabular}\end{center} +\caption{Address Ranges for \tty{ORG} --- Part 4\label{TabORG4}} +\end{table*} +\normalsize + +In case that different variations in a processor family have address +spaces of different size, the maximum range is listed for each. + +\tty{ORG} is mostly needed to give the code a new starting address or to +put different, non-continuous code parts into one source file. In case +there is no explicit other value listet in a table entry, the initial +address for this segment (i.e. the start address used without {\tt ORG}) +is 0. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{CPU} +\ttindex{CPU} + +{\em valid for: all processors} + +This command rules for which processor the further code shall be +generated. Instructions of other processor families are not +accessible afterwards and will produce error messages! + +The processors can roughly be distinguished in families, inside the +families different types additionally serve for a detailed +distinction: +%%----------- +\begin{quote} +\begin{tabbing} +\hspace{0.7cm} \= \kill +a) \> 68008 $\rightarrow$ 68000 $\rightarrow$ 68010 $\rightarrow$ 68012 $\rightarrow$ \\ + \> MCF5200 $\rightarrow$ 68332 $\rightarrow$ 68340 $\rightarrow$ 68360 $\rightarrow$ \\ + \> 68020 $\rightarrow$ 68030 $\rightarrow$ 68040 +\end{tabbing} +\end{quote} +The differences in this family lie in additional instructions and +addressing modes (starting from the 68020). A small exception is the step +to the 68030 that misses two instructions: \tty{CALLM} and \tty{RTM}. The +three representors of the 683xx family have the same processor core (a +slightly reduced 68020 CPU), however completely different peripherals. +MCF5200 represents the ColdFire family from Motorola, RISC processors +downwardly binary compatible to the 680x0. For the 68040, additional +control registers (reachable via \tty{MOVEC}) and instructions for control +of the on-chip MMU and caches were added. +%%----------- +\begin{quote} +b) 56000 $\longrightarrow$ 56002 $\longrightarrow$ 56300 +\end{quote} +While the 56002 only adds instructions for incrementing and decrementing +the accumulators, the 56300 core is almost a new processor: all address +spaces are enlarged from 64K words to 16M and the number of instructions +almost has been doubled. +%%----------- +\begin{quote} +c) PPC403 $\rightarrow$ MPPC403 $\rightarrow$ MPC505 $\rightarrow$ MPC601 $\rightarrow$ RS6000 +\end{quote} +The PPC403 is a reduced version of the PowerPC line without a floating +point unit, which is why all floating point instructions are disabled for +him; in turn, some microcontroller-specific instructions have been added +which are unique in this family. The GC variant of the PPC403 +incorporates an additional MMU and has therefore some additional +instructions for its control. The MPC505 (a microcontroller variant +without a FPU) only differ in its peripheral registers from the 601 as +long as I do not know it better - \cite{Mot505} is a bit reluctant in this +respect... The RS6000 line knows a few instructions more (that are +emulated on many 601-based systems), IBM additionally uses different +mnemonics for their pure workstation processors, as a reminiscence of 370 +mainframes... +%%----------- +\begin{quote} +d) MCORE +\end{quote} +%%----------- +\begin{quote} +e) 6800 $\rightarrow$ 6301 $\rightarrow$ 6811 +\end{quote} +While the 6301 only offers a few additional instructions, the 6811 +delivers a second index register and much more instructions. +%%----------- +\begin{quote} +f) 6809/6309 and 6805/68HC08 +\end{quote} +These processors are partially source-code compatible to the other +68xx processors, but they have a different binary code format and a +significantly reduced (6805) resp. enhanced (6809) instruction set. +The 6309 is a CMOS version of the 6809 which is officially only +compatible to the 6809, but inofficially offers more registers and a +lot of new instructions (see \cite{Kaku}). +%%----------- +\begin{quote} +g) 68HC12 +\end{quote} +%%----------- +\begin{quote} +h) 68HC16 +\end{quote} +%%----------- +\begin{quote} +i) HD6413308 $\rightarrow$ HD6413309 +\end{quote} +These both names represent the 300 and 300H variants of the H8 +family; the H version owns a larger address space (16Mbytes instead +of 64Kbytes), double-width registers (32 bits), and knows a few more +instructions and addressing modes. It is still binary upward +compatible. +%%----------- +\begin{quote} +j) HD6475328 $\rightarrow$ HD6475348 $\rightarrow$ HD6475368 $\rightarrow$ HD6475388 +\end{quote} +These processors all share the same CPU core; the different types are +only needed to include the correct subset of registers in the file +\tty{REG53X.INC}. +%%----------- +\begin{quote} +k) SH7000 $\rightarrow$ SH7600 $\longrightarrow$ SH7700 +\end{quote} +The processor core of the 7600 offers a few more instructions that +close gaps in the 7000's instruction set (delayed conditional and +relative and indirect jumps, multiplications with 32-bit operands and +multiply/add instructions). The 7700 series (also known as SH3) +furthermore offers a second register bank, better shift instructions, and +instructions to control the cache. +%%----------- +\begin{quote} +l) 6502 $\rightarrow$ 65(S)C02 / MELPS740 / 6502UNDOC +\end{quote} +The CMOS version defines some additional instructions, as well as a number of +some instruction/addressing mode combinations were added which were not +possible on the 6502. The Mitsubishi micro controllers in opposite expand +the 6502 instruction set primarily to bit operations and multiplication / +division instructions. Except for the unconditional jump and instructions +to increment/decrement the accumulator, the instruction extensions are +orthogonal. The 65SC02 lacks the bit manipulation instructions of the +65C02. The 6502UNDOC processor type enables access to the "undocumented" +6502 instructions, i.e. the operations that result from the usage of bit +combinations in the opcode that are not defined as instructions. The +variants supported by AS are listed in the appendix containing processor-specific +hints. +%%----------- +\begin{quote} +m) MELPS7700, 65816 +\end{quote} +Apart from a '16-bit-version' of the 6502's instruction set, these +processors both offer some instruction set extensions. These are +however orthogonal as they are oriented along their 8-bit +predecessors (65C02 resp. MELPS-740). Partially, different +mnemonics are used for the same operations. +%%----------- +\begin{quote} +n) MELPS4500 +\end{quote} +%%----------- +\begin{quote} +o) M16 +\end{quote} +%%----------- +\begin{quote} +p) M16C +\end{quote} +%%----------- +\begin{quote} +q) 4004 +\end{quote} +%%----------- +\begin{quote} +r) 8021, 8022, 8039, 80C39, 8048, 80C48, 8041, 8042 +\end{quote} +For the ROM-less versions 8039 and 80C39, the commands which are +using the BUS (port 0) are forbidden. The 8021 and 8022 are special +versions with a strongly shrinked instruction set, for which the 8022 +has two A/D- converters and the necessary control-commands. It is +possible to transfer the CMOS-versions with the \tty{IDL}-command into a +stop mode with lower current consumption. The 8041 and 8042 have +some additional instructions for controlling the bus interface, but +in turn a few other commands were omitted. Moreover, the code +address space of these processors is not externally extendable, +and so AS limits the code segment of these processors to 1 resp. 2 +Kbytes. +%%----------- +\begin{quote} +\begin{tabbing} +\hspace{0.7cm} \= \kill +s) \> 87C750 $\rightarrow$ 8051, 8052, 80C320, 80C501, 80C502, \\ + \> 80C504, 80515, and 80517 $\rightarrow$ 80C251 +\end{tabbing} +\end{quote} +The 87C750 can only access a maximum of 2 Kbytes program memory which is +why it lacks the \tty{LCALL} and \tty{LJMP} instructions. AS does not +make any distinction among the processors in the middle, instead it only +stores the different names in the \tty{MOMCPU} variable (see below), which +allows to query the setting with \tty{IF} instructions. An exception is +the 80C504 that has a mask flaw in its current versions. This flaw shows +up when an \tty{AJMP} or \tty{ACALL} instruction starts at the second last +address of a 2K page. AS will automatically use long instructions or +issues an error message in such situations. The 80C251 in contrast +represents a drastic progress in the the direction 16/32 bits, larger +address spaces, and a more orthogonal instruction set. +%%----------- +\begin{quote} +t) 8096 $\rightarrow$ 80196 $\rightarrow$ 80196N $\rightarrow$ 80296 +\end{quote} +Apart from a different set of SFRs (which however strongly vary from +version to version), the 80196 knows several new instructions and +supports a 'windowing' mechanism to access the larger internal RAM. +The 80196N family extends the address space to 16 Mbytes and +introduces a set of instructions to access addresses beyond 64Kbytes. +The 80296 extends the CPU core by instructions for signal processing +and a second windowing register, however removes the Peripheral +Transaction Server (PTS) and therefore looses again two machine +instructions. +%%----------- +\begin{quote} +u) 8080 and 8085 +\end{quote} +The 8085 knows the additional commands \tty{RIM} and \tty{SIM} for +controlling the interrupt mask and the two I/O-pins. +%%----------- +\begin{quote} +v) 8086 $\rightarrow$ 80186 $\rightarrow$ V30 $\rightarrow$ V35 +\end{quote} +Only new instructions are added in this family. The corresponding +8-bit versions are not mentioned due to their instruction +compatibility, so one e.g. has to choose 8086 for an 8088-based +system. +%%----------- +\begin{quote} +w) 80960 +\end{quote} +%%----------- +\begin{quote} +x) 8X300 $\rightarrow$ 8X305 +\end{quote} +The 8X305 features a couple of additional registers that miss on the +8X300. Additionally, it can do new operations with these registers +(like direct writing of 8 bit values to peripheral addresses). +%%----------- +\begin{quote} +y) XAG1, XAG2, XAG3 +\end{quote} +These processors only differ in the size of their internal ROM which +is defined in \tty{STDDEFXA.INC}. +%%----------- +\begin{quote} +z) AT90S1200 $\rightarrow$ AT90S2313 $\rightarrow$ AT90S4414 $\rightarrow$ + AT90S8515 +\end{quote} +The first member of the AVR series represents a minimum configuration +without RAM memory and therefore lacks load/store instructions. The +other two processors only differ in their memory equipment and +on-chip peripherals, what is differentiated in \tty{REGAVR.INC}. +%%----------- +\begin{quote} +aa) AM29245 $\rightarrow$ AM29243 $\rightarrow$ AM29240 $\rightarrow$ AM29000 +\end{quote} +The further one moves to the right in this list, the fewer the +instructions become that have to be emulated in software. While e.g. +the 29245 not even owns a hardware multiplier, the two representors in +the middle only lack the floating point instructions. The 29000 +serves as a 'generic' type that understands all instructions in +hardware. +%%----------- +\begin{quote} +ab) 80C166 $\rightarrow$ 80C167,80C165,80C163 +\end{quote} +80C167 and 80C165/163 have an address space of 16 Mbytes instead of 256 +Kbytes, and furthermore they know some additional instructions for +extended addressing modes and atomic instruction sequences. They are +'second generation' processors and differ from each other only in the +amount of on-chip peripherals. +%%----------- +\begin{quote} +ac) Z80 $\rightarrow$ Z80UNDOC $\rightarrow$ Z180 $\rightarrow$ Z380 +\end{quote} +While there are only a few additional instructions for the Z180, the +Z380 owns 32-bit registers, a linear address space of 4 Gbytes, a +couple of instruction set extensions that make the overall +instruction set considerably more orthogonal, and new addressing +modes (referring to index register halves, stack relative). These +extensions partially already exist on the Z80 as undocumented +extensions and may be switched on via the Z80UNDOC variant. A list +with the additional instructions can be found in the chapter +with processor specific hints. +%%----------- +\begin{quote} +ad) Z8601, Z8604, Z8608, Z8630, Z8631 +\end{quote} +These processors again only differ in internal memory size and +on-chip peripherals, i.e. the choice does not have an effect on the +supported instruction set. +%%----------- +\begin{quote} +ae) 96C141, 93C141 +\end{quote} +These two processors represent the two variations of the processor +family: TLCS-900 and TLCS-900L. The differences of these two variations +will be discussed in detail in section \ref{TLCS900Spec}. +%%----------- +\begin{quote} +af) 90C141 +\end{quote} +%%----------- +\begin{quote} +ag) 87C00, 87C20, 87C40, 87C70 +\end{quote} +The processors of the TLCS-870 series have an identical CPU core, but +different peripherals depending on the type. In part registers with +the same name are located at different addresses. The file +\tty{STDDEF87.INC} uses, similar to the MCS-51-family, the distinction +possible by different types to provide the correct symbol set +automatically. +%%----------- +\begin{quote} +ah) 47C00 $\rightarrow$ 470C00 $\rightarrow$ 470AC00 +\end{quote} +These three variations of the TLCS-47-family have on-chip RAM and ROM +of different size, which leads to several bank switching instructions +being added or suppressed. +%%----------- +\begin{quote} +ai) 97C241 +\end{quote} +%%----------- +\begin{quote} +aj) 16C54 $\rightarrow$ 16C55 $\rightarrow$ 16C56 $\rightarrow$ 16C57 +\end{quote} +These processors differ by the available code area, i.e. by the address +limit after which AS reports overruns. +%%----------- +\begin{quote} +ak) 16C84, 16C64 +\end{quote} +Analog to the MCS-51 family, no distinction is made in the code generator, +the different numbers only serve to include the correct SFRs in +\tty{STDDEF18.INC}. +%%----------- +\begin{quote} +al) 17C42 +\end{quote} +%%----------- +\begin{quote} +am) ST6210/ST6215$\rightarrow$ST6220/ST6225 +\end{quote} +The only distinction AS makes between the two pairs is the smaller +addressing space (2K instead 4K) of the first ones. The detailed +distinction serves to provide an automatic distinction in the source +file which hardware is available (analog to the 8051/52/515). +%%----------- +\begin{quote} +an) ST7 +\end{quote} +%%----------- +\begin{quote} +ao) ST9020, ST9030, ST9040, ST9050 +\end{quote} +These 4 names represent the four ''sub-families'' of the ST9 family, +which only differ in their on-chip peripherals. Their processor +cores are identical, which is why this distinction is again only used +in the include file containing the peripheral addresses. +%%----------- +\begin{quote} +ap) 6804 +\end{quote} +%%----------- +\begin{quote} +aq) 32010$\rightarrow$32015 +\end{quote} +The TMS32010 owns just 144 bytes of internal RAM, and so AS limits +addresses in the data segment just up to this amount. This restriction +does not apply for the 32015, the full range from 0..255 can be used. +%%----------- +\begin{quote} +ar) 320C25 $\rightarrow$ 320C26 $\rightarrow$ 320C28 +\end{quote} +These processors only differ slightly in their on-chip peripherals +and in their configuration instructions. +%%----------- +\begin{quote} +as) 320C30, 320C31 +\end{quote} +The 320C31 is a reduced version with the same instruction set, +however fewer peripherals. The distinction is exploited in +\tty{STDDEF3X.INC}. +%%----------- +\begin{quote} +at) 320C203 $\rightarrow$ 320C50, 320C51, 320C53 +\end{quote} +The first one represents the C20x family of signal processors which +implement a subset of the C5x instruction set. The distinction among the +C5x processors is currently not used by AS. +%%----------- +\begin{quote} +au) TMS9900 +\end{quote} +%%----------- +\begin{quote} +\begin{tabbing} +\hspace{0.7cm} \= \kill +av) \> TMS70C00, TMS70C20, TMS70C40,\\ + \> TMS70CT20, TMS70CT40,\\ + \> TMS70C02, TMS70C42, TMS70C82,\\ + \> TMS70C08, TMS70C48\\ +\end{tabbing} +\end{quote} +All members of this family share the same CPU core, they therefore do not +differ in their instruction set. The differences manifest only in the +file \tty{REG7000.INC} where address ranges and peripheral addresses are +defined. Types listed in the same row have the same amount of internal +RAM and the same on-chip peripherals, they differ only in the amount of +integrated ROM. +%%----------- +\begin{quote} +aw) 370C010, 370C020, 370C030, 370C040 and 370C050 +\end{quote} +Similar to the MCS-51 family, the different types are only used to +differentiate the peripheral equipment in \tty{STDDEF37.INC}; the +instruction set is always the same. +%%----------- +\begin{quote} +ax) MSP430 +\end{quote} +%%----------- +\begin{quote} +ay) SC/MP +\end{quote} +%%----------- +\begin{quote} +az) COP87L84 +\end{quote} +This is the only member of National Semiconductor's COP8 family that +is currently supported. I know that the family is substantially +larger and that there are representors with differently large +instruction sets which will be added when a need occurs. It is a +beginning, and National's documentation is quite extensive... +%%----------- +\begin{quote} +\begin{tabbing} +\hspace{0.7cm} \= \kill +ba) \> SC14400, SC14401, SC14402, SC14404, SC14405, \\ + \> SC14420, SC14421, SC14422, SC14424 \\ +\end{tabbing} +\end{quote} +This series of DECT controllers differentiates itself by the amount of +instructions, since each of them supports different B field formats and +their architecture has been optimized over time. +%%----------- +\begin{quote} +bb) 7810$\rightarrow$78C10 +\end{quote} +The NMOS version has no stop-mode; the respective command and the ZCM +register are omitted. \bb{CAUTION!} NMOS and CMOS version partially +differ in the reset values of some registers! +%%----------- +\begin{quote} +\begin{tabbing} +\hspace{0.7cm} \= \kill +bc) \> 75402,\\ + \> 75004, 75006, 75008,\\ + \> 75268,\\ + \> 75304, 75306, 75308, 75312, 75316,\\ + \> 75328,\\ + \> 75104, 75106, 75108, 75112, 75116,\\ + \> 75206, 75208, 75212, 75216,\\ + \> 75512, 75516\\ +\end{tabbing} +\end{quote} +This 'cornucopia' of processors differs only by the RAM size in one +group; the groups themselves again differ by their on-chip +peripherals on the one hand and by their instruction set's power on +the other hand. +%%----------- +\begin{quote} +bd) 78070 +\end{quote} +This is currently the only member of NEC's 78K0 family I am familiar +with. Similar remarks like for the COP8 family apply! +%%----------- +\begin{quote} +be) 7720 $\rightarrow$ 7725 +\end{quote} +The $\mu$PD7725 offers larger address spaces and som more instructions +compared to his predecessor. {\bf CAUTION!} The processors are not binary +compatible to each other! +%%----------- +\begin{quote} +bf) 77230 +\end{quote} +%%----------- +\begin{quote} +\begin{tabbing} +bh) \= SYM53C810, SYM53C860, SYM53C815, SYM53C825, \\ + \> SYM53C875, SYM53C895 +\end{tabbing} +\end{quote} +The simpler members of this family of SCSI processors lack some +instruction variants, furthermore they are different in their set of +internal registers. +%%----------- +\begin{quote} +bh) MB89190 +\end{quote} +This processor type represents Fujitsu's F$^{2}$MC8L series. + +The \tty{CPU} instruction needs the processor type as a simple constant, a +calculation like: +\begin{verbatim} + CPU 68010+10 +\end{verbatim} +is not allowed. Valid calls are e.g. +\begin{verbatim} + CPU 8051 +\end{verbatim} +or +\begin{verbatim} + CPU 6800 +\end{verbatim} +Regardless of the processor type currently set, the integer variable +\tty{MOMCPU} contains the current status as a hexadecimal number. For +example, \tty{MOMCPU}=\$68010 for the 68010 or \tty{MOMCPU}=80C48H for the +80C48. As one cannot express all letters as hexadecimal digits (only A..F +are possible), all other letters must must be omitted in the hex notation; +for example, \tty{MOMCPU}=80H for the Z80. + +You can take advantage of this feature to generate different code +depending on the processor type. For example, the 68000 does not have a +machine instruction for a subroutine return with stack correction. With +the variable \tty{MOMCPU} you can define a macro that uses the machine +instruction or emulates it depending on the processor type: +\begin{verbatim} +myrtd macro disp + if MOMCPU<$68010 ; emulate for 68008 & 68000 + move.l (sp),disp(sp) + lea disp(sp),sp + rts + elseif + rtd #disp ; direct use on >=68010 + endif + endm + + + cpu 68010 + myrtd 12 ; results in RTD #12 + + cpu 68000 + myrtd 12 ; results in MOVE../LEA../RTS +\end{verbatim} +As not all processor names are built only out of numbers and letters +from A..F, the full name is additionally stored in the string +variable named \tty{MOMCPUNAME}. + +The assembler implicitly switches back to the \tty{CODE} segment when a +\tty{CPU} instruction is executed. This is done because \tty{CODE} is the +only segment all processors support. + +The default processor type is 68008, unless it has been changed via the +command line option with same name. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{SUPMODE, FPU, PMMU} +\ttindex{SUPMODE}\ttindex{FPU}\ttindex{PMMU} + +{\em\begin{tabbing} + valid for: \= 680x0, FPU also for 80x86, i960, SUPMODE also for \\ + \> TLCS-900, SH7000, i960, 29K, XA, PowerPC, M*Core, \\ + \> and TMS9900 +\end{tabbing}} + +These three switches allow to define which parts of the instruction set +shall be disabled because the necessary preconditions are not valid for +the following piece of code. The parameter for these instructions may be +either \tty{ON} or \tty{OFF}, the current status can be read out of a +variable which is either TRUE or FALSE. + +The commands have the following meanings in detail: +\begin{itemize} +\item{\tty{SUPMODE}: allows or prohibits commands, for whose execution the + processor has to be within the supervisor mode. The status + variable is called \tty{INSUPMODE}.} +\item{\tty{FPU}: allows or prohibits the commands of the numerical + coprocessors 8087 resp. 68881 or 68882. The status variable + is called \tty{FPUAVAIL}.} +\item{\tty{PMMU}: allows or prohibits the commands of the memory + management unit 68851 resp. of the built-in MMU of the 68030. + \bb{CAUTION!} The 68030-MMU supports only a relatively small subset + of the 68851 instructions. The assembler cannot test this! + The status variable is called \tty{PMMUAVAIL}.} +\end{itemize} +The usage of of instructions prohibited in this manner will generate a +warning at \tty{SUPMODE}, at \tty{PMMU} and \tty{FPU} a real error +message. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{FULLPMMU} +\ttindex{FULLPMMU} + +{\em valid for: 680x0} + +Motorola integrated the MMU into the processor starting with the 68030, but +the built-in FPU is equipped only with a relatively small subset of the +68851 instruction set. AS will therefore disable all extended MMU +instructions when the target processor is 68030 or higher. It is however +possible that the internal MMU has been disabled in a 68030-based system +and the processor operates with an external 68851. One can the use a +\tty{FULLPMMU ON} to tell AS that the complete MMU instruction set is +allowed. Vice versa, one may use a \tty{FULLPMMU OFF} to disable all +additional instruction in spite of a 68020 target platform to assure that +portable code is written. The switch between full and reduced instruction +set may be done as often as needed, and the current setting may be read +from a symbol with the same name. \bb{CAUTION!} The \tty{CPU} instruction +implicitly sets or resets this switch when its argument is a 68xxx +processor! \tty{FULLPMMU} therefore has to be written after the \tty{CPU} +instruction! + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{PADDING} +\ttindex{PADDING} + +{\em valid for: 680x0, M*Core, XA, H8, SH7000, MSP430, TMS9900, ST7} + +Processors of the 680x0 family are quite critical regarding odd addresses: +instructions must not start on an odd address, and data accesses to odd +addresses are only allowed bytewise up to the 68010. The H8/300 family +simply resets the lowest address bit to zero when accessing odd addresses, +the 500 in contrast 'thanks' with an exception... AS therefore tries to +round up data structures built with \tty{DC} or \tty{DS} to an even number +of bytes. This however means for \tty{DC.B} and \tty{DS.B} that a padding +byte may have to be added. This behaviour can be turned on and off via +the \tty{PADDING} instruction. Similar to the previous instructions, the +argument may be either \tty{ON} or \tty{OFF}, and the current setting may +be read from a symbol with the same name. \tty{PADDING} is by default only +enabled for the 680x0 family, it has to be turned on explicitly for all +other families! + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{MAXMODE} +\ttindex{MAXMODE} + +{\em valid for: TLCS-900, H8} + +The processors of the TLCS-900-family are able to work in 2 modes, the +minimum and maximum mode. Depending on the actual mode, the execution +environment and the assembler are a little bit different. Along with this +instruction and the parameter \tty{ON} or \tty{OFF}, AS is informed that the +following code will run in maximum resp. minimum mode. The actual setting +can be read from the variable \tty{INMAXMODE}. Presetting is \tty{OFF}, +i.e. minimum mode. + +Similarly, one uses this instruction to tell AS in H8 mode whether the +address space is 64K or 16 Mbytes. This setting is always \tty{OFF} for +the 'small' 300 version and cannot be changed. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{EXTMODE and LWORDMODE} +\ttindex{EXTMODE}\ttindex{LWORDMODE} + +{\em valid for: Z380} + +The Z380 may operate in altogether 4 modes, which are the result of +setting two flags: The XM flag rules whether the processor shall operate +wit an address space of 64 Kbytes or 4 Gbytes and it may only be set to 1 +(after a reset, it is set to 0 for compatibility with the Z80). The LW +flag in turn rules whether word operations shall work with a word size of +16 or 32 bits. The setting of these two flags influences range checks of +constants and addresses, which is why one has to tell AS the setting of +these two flags via these instructions. The default assumption is that +both flags are 0, the current setting (\tty{ON} or \tty{OFF}) may be read +from the predefined symbols \tty{INEXTMODE} resp. \tty{INLWORDMODE.} + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{SRCMODE} +\ttindex{SRCMDE} + +{\em valid for: MCS-251} + +Intel substantially extended the 8051 instruction set with the 80C251, but +unfortunately there was only a single free opcode for all these new +instructions. To avoid a processor that will be eternally crippled by a +prefix, Intel provided two operating modes: the binary and the source +mode. The new processor is fully binary compatible to the 8051 in binary +mode, all new instructions require the free opcode as prefix. In source +mode, the new instructions exchange their places in the code tables with +the corresponding 8051 instructions, which in turn then need a prefix. +One has to inform AS whether the processor operates in source mode +(\tty{ON}) or binary mode (\tty{OFF}) to enable AS to add prefixes when +required. The current setting may be read from the variable +\tty{INSRCMODE}. The default is \tty{OFF}. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{BIGENDIAN} +\ttindex{BIGENDIAN} + +{\em valid for: MCS-51/251, PowerPC} + +Intel broke with its own principles when the 8051 series was designed: in +contrast to all traditions, the processor uses big-endian ordering for all +multi-byte values! While this was not a big deal for MCS-51 processors +(the processor could access memory only in 8-bit portions, so everyone was +free to use whichever endianess one wanted), it may be a problem for the +251 as it can fetch whole (long-)words from memory and expects the MSB to +be first. As this is not the way of constant disposal earlier versions of +AS used, one can use this instruction to toggle between big and +little endian mode for the instructions \tty{DB, DW, DD, DQ,} and +\tty{DT}. \tty{BIGENDIAN OFF} (the default) puts the LSB first into +memory as it used to be on earlier versions of AS, \tty{BIGENDIAN ON} +engages the big-endian mode compatible to the MCS-251. One may of course +change this setting as often as one wants; the current setting can be read +from the symbol with the same name. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{WRAPMODE} +\ttindex{WRAPMODE} + +{\em valid for: Atmel AVR} + +After this switch has been set to {\tt ON}, AS will assume that the +processor's program counter does not have the full length of 16 bits given +by the architecture, but instead a length that is exactly sufficient to +address the internal ROM. For example, in case of the AT90S8515, this +means 12 bits, corresponding to 4 Kwords or 8 Kbytes. This assumption +allows relative branches from the ROM's beginning to the end and vice +versa which would result in an out-of-branch error when using strict +arithmetics. Here, they work because the carry bits resulting from the +target address computation are discarded. Assure that the target +processor you are using works in the outlined way before you enable this +option! In case of the abovementioned AT90S8515, this option is even +necessary because it is the only way to perform a direct jump through +the complete address space... + +This switch is set to {\tt OFF} by default, and its current setting may be +read from a symbol with same name. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{SEGMENT} +\ttindex{SEGMENT} + +{\em valid for: all processors} + +Some microcontrollers and signal processors know various address ranges, +which do not overlap with each other and require also different +instructions and addressing modes for access. To manage these ones also, +the assembler provides various program counters, you can switch among +them to and from by the use of the \tty{SEGMENT} instruction. For subroutines +included with \tty{INCLUDE}, this e.g. allows to define data used by the +main program or subroutines near to the place they are used. In detail, +the following segments with the following names are supported: +\begin{itemize} +\item{\tty{CODE}: program code;} +\item{\tty{DATA}: directly addressable data (including SFRs);} +\item{\tty{XDATA}: data in externally connected RAM or + X-addressing space of the DSP56xxx or ROM data for the $\mu$PD772x;} +\item{\tty{YDATA}: Y-addressing space of the DSP56xxx;} +\item{\tty{IDATA}: indirectly addressable (internal) data; } +\item{\tty{BITDATA}: the part of the 8051-internal RAM that is bitwise + addressable;} +\item{\tty{IO}: I/O-address range;} +\item{\tty{REG}: register bank of the ST9;} +\item{\tty{ROMDATA}: constant ROM of the NEC signal processors.} +\end{itemize} +See also section \ref{SectORG} (\tty{ORG}) for detailed information about +address ranges and initial values of the segments. Depending on the +processor family, not all segment types will be permitted. + +The bit segment is managed as if it would be a byte segment, i.e. the +addresses will be incremented by 1 per bit. + +Labels get the same type as attribute as the segment that was active +when the label was defined. So the assembler has a limited ability +to check whether you access symbols of a certain segment with wrong +instructions. In such cases the assembler issues a warning. + +Example: +\begin{verbatim} + CPU 8051 ; MCS-51-code + + segment code ; test code + + setb flag ; no warning + setb var ; warning : wrong segment + + segment data + +var db ? + + segment bitdata + +flag db ? +\end{verbatim} + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{PHASE and DEPHASE} +\ttindex{PHASE}\ttindex{DEPHASE} + +{\em valid for: all processors} + +For some applications (especially on Z80 systems), the code must be moved +to another address range before execution. If the assembler didn't know +about this, it would align all labels to the load address (not the start +address). The programmer is then forced to write jumps within this area +either independent of location or has to add the offset at each symbol +manually. The first one is not possible for some processors, the last one +is extremely error-prone. With the commands \tty{PHASE} and +\tty{DEPHASE}, it is possible to inform the assembler at which address the +code will really be executed on the target system: +\begin{verbatim} + phase
+\end{verbatim} +informs the assembler that the following code shall be executed at the +specified address. The assembler calculates thereupon the difference to +the real program counter and adds this difference for the following +operations: +\begin{itemize} +\item{address values in the listing} +\item{filing of label values} +\item{program counter references in relative jumps and address expressions} +\item{readout of the program counter via the symbols * or \$} +\end{itemize} +this ''shifting'' is switched off by the instruction +\begin{verbatim} + dephase +\end{verbatim} +The assembler manages phase values for all defined segments, although +this instruction pair only makes real sense in the code segment. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{SAVE and RESTORE} +\ttindex{SAVE}\ttindex{RESTORE} + +{\em valid for: all processors} + +The command \tty{SAVE} forces the assembler to push the contents of +following variables onto an internal stack: +\begin{itemize} +\item{currently selected processor type (set by \tty{CPU});} +\item{currently active memory area (set by \tty{SEGMENT});} +\item{the flag whether listing is switched on or off (set by \tty{LISTING});} +\item{the flag whether expansions of following macros shall be issued in + the assembly listing (set by \tty{MACEXP}).} +\item{currently active character translation table (set by + \tty{CODEPAGE}).} +\end{itemize} +The counterpart \tty{RESTORE} pops the values saved last from this stack. +These two commands were primarily designed for include files, to change +the above mentioned variables in any way inside of these files, without +loosing their original content. This may be helpful e.g. in include files +with own, fully debugged subroutines, to switch the listing generation +off: +\begin{verbatim} + SAVE ; save old status + + LISTING OFF ; save paper + + . ; the actual code + . + + RESTORE ; restore +\end{verbatim} +In opposite to a simple \tty{LISTING OFF .. ON}-pair, the correct status +will be restored, in case the listing generation was switched off already +before. + +The assembler checks if the number of \tty{SAVE}-and +\tty{RESTORE}-commands corresponds and issues error messages in the +following cases: +\begin{itemize} +\item{\tty{RESTORE}, but the internal stack is empty;} +\item{the stack not empty at the end of a pass.} +\end{itemize} + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{ASSUME} +\ttindex{ASSUME} + +{\em valid for: various} + +This instruction allows to tell AS the current setting of certain +registers whose contents cannot be described with a simple \tty{ON} or +\tty{OFF}. These are typically registers that influence addressing modes +and whose contents are important to know for AS in order to generate +correct addressing. It is important to note that \tty{ASSUME} only +informs AS about these, \bb{no} machine code is generated that actually +loads these values into the appropriate registers! + +%%. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . + +\subsubsection{6809} + +In contrast to its 'predecessors' like the 6800 and 6502, the position of +the direct page, i.e. the page of memory that can be reached with +single-byte addresses, can be set freely. This is done via the 'direct +page register' that sets the page number. One has to assign a +corresponding value to this register via \tty{ASSUME} is the contents are +different from the default of 0, otherwise wrong addresses will be +generated! + +%%. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . + +\subsubsection{68HC16} + +The 68HC16 employs a set of bank registers to address a space of 1 +Mbyte with its registers that are only 16 bits wide. These registers +supply the upper 4 bits. Of these, the EK register is responsible +for absolute data accesses (not jumps!). AS checks for each absolute +address whether the upper 4 bits of the address are equal to the +value of EK specified via \tty{ASSUME}. AS issues a warning if they +differ. The default for EK is 0. + +%%. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . + +\subsubsection{H8/500} + +In maximum mode, the extended address space of these processors is +addressed via a couple of bank registers. They carry the names DP +(registers from 0..3, absolute addresses), EP (register 4 and 5), and TP +(stack). AS needs the current value of DP to check if absolute addresses +are within the currently addressable bank; the other two registers are +only used for indirect addressing and can therefore not be monitored; it +is a question of personal taste whether one specifies their values or not. +The BR register is in contrast important because it rules which 256-byte +page may be accessed with short addresses. It is common for all registers +that AS does not assume \bb{any} default value for them as they are +undefined after a CPU reset. Everyone who wants to use absolute addresses +must therefore assign values to at least DR and DP! + +%%. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . + +\subsubsection{MELPS740} + +Microcontrollers of this series know a ''special page'' addressing mode +for the \tty{JSR} instruction that allows a shorter coding for jumps into +the last page of on-chip ROM. The size of this ROM depends of course +on the exact processor type, and there are more derivatives than it +would be meaningful to offer via the CPU instruction...we therefore +have to rely on \tty{ASSUME} to define the address of this page, e.g. +\begin{verbatim} + ASSUME SP:$1f +\end{verbatim} +in case the internal ROM is 8K. + +%%. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . + +\subsubsection{MELPS7700/65816} + +These processors contain a lot of registers whose contents AS has to know +in order to generate correct machine code. These are the registers +in question: +\begin{center}\begin{tabular}{|l|l|l|l|} +\hline +name & function & value range & default \\ +\hline +\hline +DT & data bank & 0-\$ff & 0 \\ +PG & code Bank & 0-\$ff & 0 \\ +DPR & directly addr. page & 0-\$ffff & 0 \\ +X & index register width & 0 or 1 & 0 \\ +M & accumulator width & 0 or 1 & 0 \\ +\hline +\end{tabular}\end{center} +\par +To avoid endless repetitions, see section \ref{MELPS7700Spec} for +instructions how to use these registers. The handling is otherwise +similar to the 8086, i.e. multiple values may be set with one instruction +and no code is generated that actually loads the registers with the given +values. This is again up to the programmer! + +%%. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . + +\subsubsection{MCS-196/296} + +Starting with the 80196, all processors of the MCS-96 family have a +register 'WSR' that allows to map memory areas from the extended +internal RAM or the SFR range into areas of the register file which +may then be accessed with short addresses. If one informs AS about +the value of the WSR register, it can automatically find out whether +an absolute address can be addressed with a single-byte address via +windowing; consequently, long addresses will be automatically generated +for registers covered by windowing. The 80296 contains an additional +register WSR1 to allow simultaneous mapping of two memory areas into +the register file. In case it is possible to address a memory cell +via both areas, AS will always choose the way via WSR! + +%%. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . + +\subsubsection{8086} + +The 8086 is able to address data from all segments in all +instructions, but it however needs so-called ''segment prefixes'' if +another segment register than DS shall be used. In addition it is +possible that the DS register is adjusted to another segment, e.g. to +address data in the code segment for longer parts of the program. As +AS cannot analyze the code's meaning, it has to informed via this +instruction to what segments the segment registers point at the +moment, e.g.: +\begin{verbatim} + ASSUME CS:CODE, DS:DATA . +\end{verbatim} +It is possible to assign assumptions to all four segment registers in +this way. This instruction produces \bb{no} code, so the program itself +has to do the actual load of the registers with the values. + +The usage of this instruction has on the one hand the result that AS is +able to automatically put ahead prefixes at sporadic accesses into the +code segment, or on the other hand, one can inform AS that the DS-register +was modified and you can save explicit \tty{CS:}-instructions. + +Valid arguments behind the colon are \tty{CODE}, \tty{DATA} and +\tty{NOTHING}. The latter value informs AS that a segment register +contains no usable value (for AS). The following values are +preinitialized: +\begin{verbatim} + CS:CODE, DS:DATA, ES:NOTHING, SS:NOTHING +\end{verbatim} + +%%. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . + +\subsubsection{XA} + +The XA family has a data address space of 16 Mbytes, a process however +can always address within a 64K segment only that is given by the DS +register. One has to inform AS about the current value of this +register in order to enable it to check accesses to absolute +addresses. + +%%. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . + +\subsubsection{29K} + +The processors of the 29K family feature a register RBP that allows +to protect banks of 16 registers against access from user mode. The +corresponding bit has to be set to achieve the protection. \tty{ASSUME} +allows to tell AS which value RBP currently contains. AS can warn +this way in case a try to access protected registers from user mode +is made. + +%%. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . + +\subsubsection{80C166/167} + +Though none of the 80C166/167's registers is longer than sixteen bits, +this processor has 18/24 address lines and can therefore address up +to 256Kbytes/16Mbytes. To resolve this contradiction, it neither +uses the well-known (and ill-famed) Intel method of segmentation nor +does it have inflexible bank registers...no, it uses paging! To accomplish +this, the logical address space of 64 Kbytes is split into 4 pages of +16 Kbytes, and for each page there is a page register (named +DPP0..DPP3) that rules which of the 16/1024 physical pages shall be +mapped to this logical page. AS always tries to present the address +space with a size of 256Kbytes/16MBytes in the sight of the +programmer, i.e. the physical page is taken for absolute accesses and +the setting of bits 14/15 of the logical address is deduced. If no +page register fits, a warning is issued. AS assumes by default that +the four registers linearly map the first 64 Kbytes of memory, in the +following style: +\begin{verbatim} + ASSUME DPP0:0,DPP1:1,DPP2:2,DPP3:3 +\end{verbatim} +The 80C167 knows some additional instructions that can override the +page registers' function. The chapter with processor-specific hints +describes how these instructions influence the address generation. + +%%. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . + +\subsubsection{TLCS-47} + +The direct data address space of these processors (it makes no +difference whether you address directly or via the HL register) has a +size of only 256 nibbles. Because the ''better'' family members have +up to 1024 nibbles of RAM on chip, Toshiba was forced to introduce a +banking mechanism via the DMB register. AS manages the data segment +as a continuous addressing space and checks at any direct addressing +if the address is in the currently active bank. The bank AS +currently expects can be set by means of +\begin{verbatim} + ASSUME DMB:<0..3> +\end{verbatim} +The default value is 0. + +%%. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . + +\subsubsection{ST6} +\label{ST6Assume} + +The microcontrollers of the ST62 family are able to map a part (64 bytes) +of the code area into the data area, e.g. to load constants from the ROM. +This means also that at one moment only one part of the ROM can be +addressed. A special register rules which part it is. AS cannot check +the contents of this register directly, but it can be informed by this +instruction that a new value has been assigned to the register. AS then +can test and warn if necessary, in case addresses of the code segment are +accessed, which are not located in the ''announced'' window. If, for +example, the variable \tty{VARI} has the value 456h, so +\begin{verbatim} + ASSUME ROMBASE:VARI>>6 +\end{verbatim} +sets the AS-internal variable to 11h, and an access to \tty{VARI} +generates an access to address 56h in the data segment. + +It is possible to assign a simple \tty{NOTHING} instead of a value, e.g. +if the bank register is used temporarily as a memory cell. This value is +also the default. + +%%. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . + +\subsubsection{ST9} + +The ST9 family uses exactly the same instructions to address code and +data area. It depends on the setting of the flag register's DP flag +which address space is referenced. To enable AS to check if one +works with symbols from the correct address space (this of course +\bb{only} works with absolute accesses!), one has to inform AS whether the +DP flag is currently 0 (code) or 1 (data). The initial value of this +assumption is 0. + +%%. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . + +\subsubsection{320C3x} + +As all instruction words of this processor family are only 32 bits +long (of which only 16 bits were reserved for absolute addresses), +the missing upper 8 bits have to be added from the DP register. It +is however still possible to specify a full 24-bit address when +addressing, AS will check then whether the upper 8 bits are equal to +the DP register's assumed values. \tty{ASSUME} is different to the +\tty{LDP} instruction in the sense that one cannot specify an arbitrary +address out of the bank in question, one has to extract the upper bits by +hand: +\begin{verbatim} + ldp @addr + assume dp:addr>>16 + . + . + ldi @addr,r2 +\end{verbatim} + +%%. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . + +\subsubsection{$\mu$PD78(C)10} + +These processors have a register (V) that allows to move the ''zero +page'', i.e. page of memory that is addressable by just one byte, +freely in the address space, within page limits. By reasons of +comforts you don't want to work with expressions such as +\begin{verbatim} + inrw Lo(counter) +\end{verbatim} +so AS takes over this job, but only under the premise that it is informed +via the \tty{ASSUME}-command about the contents of the V register. If an +instruction with short addressing is used, it will be checked if the upper +half of the address expression corresponds to the expected content. A +warning will be issued if both do not match. + +%%. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . + +\subsubsection{75K0} + +As the whole address space of 12 bits could not be addressed even by +the help of register pairs (8 bits), NEC had to introduce banking +(like many others too...): the upper 4 address bits are fetched from +the MBS register (which can be assigned values from 0 to 15 by the +\tty{ASSUME} instruction), which however will only be regarded if the MBE +flag has been set to 1. If it is 0 (default), the lowest and highest +128 nibbles of the address space can be reached without banking. The +\tty{ASSUME} instruction is undefined for the 75402 as it contains neither +a MBE flag nor an MBS register; the initial values cannot be changed +therefore. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{EMULATED} + +{\em valid for: 29K} + +AMD defined the 29000's series exception handling for undefined +instructions in a way that there is a separate exception vector for +each instruction. This allows to extend the instruction set of a +smaller member of this family by a software emulation. To avoid that +AS quarrels about these instructions as being undefined, the +\tty{EMULATED} instruction allows to tell AS that certain instructions are +allowed in this case. The check if the currently set processors knows the +instruction is then skipped. For example, if one has written a module +that supports 32-bit IEEE numbers and the processor does not have a FPU, +one writes +\begin{verbatim} + EMULATED FADD,FSUB,FMUL,FDIV + EMULATED FEQ,FGE,FGT,SQRT,CLASS +\end{verbatim} + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{BRANCHEXT} +\ttindex{BRANCHEXT} + +{\em valid for: XA} + +{\tt BRANCHEXT} with either \tty{ON} or \tty{OFF} as argument tells AS +whether short branches that are only available with an 8-bit displacement +shall automatically be 'extended', for example by replacing a single +instruction like +\begin{verbatim} + bne target +\end{verbatim} +with a longer sequence of same functionality, in case the branc target is +out of reach for the instruction's displacement. For example, the +replacement sequence for {\tt bne} would be +\begin{verbatim} + beq skip + jmp target +skip: +\end{verbatim} +In case there is no fitting 'opposite' for an instruction, the sequence +may become even longer, e.g. for {\tt jbc}: +\begin{verbatim} + jbc dobr + bra skip +dobr: jmp target +skip: +\end{verbatim} +This feature however has the side effect that there is no unambigious +assignment between machine and assembly code any more. Furthermore, +additional passes may be the result if there are forward branches. One +should therefore use this feature with caution! + +%%--------------------------------------------------------------------------- + +\section{Data Definitions} + +The instructions described in this section partially overlap in their +functionality, but each processor family defines other names for the +same function. To stay compatible with the standard assemblers, this +way of implementation was chosen. + +If not explicitly mentioned otherwise, all instructions for data +deposition (not those for reservation of memory!) allow an arbitrary +number of parameters which are being processed from left to right. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{DC[.Size]} +\ttindex{DC} + +{\em valid for: 680x0, M*Core, 68xx, H8, SH7x00, DSP56xxx, XA, ST7} + + +This instruction places one or several constants of the type +specified by the attribute into memory. The attributes are the same ones as +defined in section \ref{AttrTypes}, and there is additionally the +possibility for byte constants to place string constants in memory, like +\begin{verbatim} +String dc.B "Hello world!\0" +\end{verbatim} +The parameter count may be between 1 and 20. A repeat count enclosed +in brackets may additionally be prefixed to each parameter; for +example, one can for example fill the area up to the next page +boundary with zeroes with a statement like +\begin{verbatim} + dc.b [(*+255)&$ffffff00-*]0 +\end{verbatim} +\bb{CAUTION!} This function easily allows to reach the limit of 1 Kbyte +of generated code per line! + +The assembler can automatically add another byte of data in case the byte sum +should become odd, to keep the word alignment. This behaviour may be +turned on and off via the \tty{PADDING} instruction. + +Decimal floating point numbers stored with this instruction (\tty{DC.P...}) +can cover the whole range of extended precision, one however has to +pay attention to the detail that the coprocessors currently available +from Motorola (68881/68882) ignore the thousands digit of the +exponent at the read of such constants! + +The default attribute is \tty{W}, that means 16-bit-integer numbers. + +For the DSP56xxx, the data type is fixed to integer numbers (an attribute is +therefore neither necessary nor allowed), which may be in the range +of -8M up to 16M-1. String constants are also allowed, whereby three characters +are packed into each word. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{DS[.Size]} +\ttindex{DS} + +{\em valid for: 680x0, M*Core, 68xx, H8, SH7x00, DSP56xxx, XA,, ST7} + +On the one hand, this instruction enables to reserve memory space for +the specified count of numbers of the type given by the attribute. +Therefore, +\begin{verbatim} + DS.B 20 +\end{verbatim} +for example reserves 20 bytes of memory, but +\begin{verbatim} + DS.X 20 +\end{verbatim} +reserves 240 bytes! + +The other purpose is the alignment of the program counter which is +achieved by a count specification of 0. In this way, with a +\begin{verbatim} + DS.W 0 , +\end{verbatim} +the program counter will be rounded up to the next even address, with +a +\begin{verbatim} + DS.D 0 +\end{verbatim} +in contrast to the next double word boundary. Memory cells possibly +staying unused thereby are neither zeroed nor filled with NOPs, they +simply stay undefined. + +The default for the operand length is - as usual - \tty{W}, i.e. 16 bits. + +For the 56xxx, the operand length is fixed to words (of 24 bit), +attributes therefore do not exist just as in the case of \tty{DC}. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{DB,DW,DD,DQ, and DT} +\ttindex{DB}\ttindex{DW}\ttindex{DD}\ttindex{DQ}\ttindex{DT} + +{\em\begin{tabbing} +valid for: \= Intel, Zilog, Toshiba, NEC, TMS370, Siemens, AMD, \\ + \> MELPS7700/65816, M16(C), National, ST9, TMS70Cxx, \\ + \> $\mu$PD77230, Fairchild +\end{tabbing}} + +These commands are - one could say - the Intel counterpart to \tty{DS} and +\tty{DC}, and as expected, their logic is a little bit different: First, +the specification of the operand length is moved into the mnemonic: +\begin{itemize} +\item{\tty{DB}: byte or ASCII string similar to \tty{DC.B}} +\item{\tty{DW}: 16-bit integer} +\item{\tty{DD}: 32-bit integer or single precision} +\item{\tty{DQ}: double precision (64 bits)} +\item{\tty{DT}: extended precision (80 bits)} +\end{itemize} +Second, the distinction between constant definition and memory +reservation is done by the operand. A reservation of memory is +marked by a \tty{?} : +\begin{verbatim} + db ? ; reserves a byte + dw ?,? ; reserves memory for 2 words (=4 byte) + dd -1 ; places the constant -1 (FFFFFFFFH) ! +\end{verbatim} +Reserved memory and constant definition \bb{must not} be mixed within one +instruction: +\begin{verbatim} + db "hello",? ; --> error message +\end{verbatim} +Additionally, the \tty{DUP} Operator permits the repeated placing of +constant sequences or the reservation of whole memory blocks: +\begin{verbatim} + db 3 dup (1,2) ; --> 1 2 1 2 1 2 + dw 20 dup (?) ; reserves 40 bytes of memory +\end{verbatim} +As you can see, the \tty{DUP}-argument must be enclosed in parentheses, +which is also why it may consist of several components, that may +themselves be \tty{DUP}s...the stuff therefore works recursively. +\tty{DUP} is however also a place where one can get in touch with another +limit of the assembler: a maximum of 1024 bytes of code or data may be +generated in one line. This is not valid for the reservation of memory, +only for the definition of constant arrays! + +In order to be compatible to the M80, \tty{DEFB/DEFW} may be used instead of +\tty{DB/DW} in Z80-mode. + +Similarly, \tty{BYTE/ADDR} resp. \tty{WORD/ADDRW} in COP8 mode are an +alias for \tty{DB} resp. \tty{DW}, with the pairs differing in byte order: +instructions defined by National for address storage use big endian, +\tty{BYTE} resp. \tty{WORD} in contrast use little endian. + +The NEC 77230 is special with its \tty{DW} instruction: It more works like +the \tty{DATA} statement of its smaller brothers, but apart from string +and integer arguments, it also accepts floating point values (and stores +them in the processor's proprietary 32-bit format). There is {\em no} +\tty{DUP} operator! + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{DS, DS8} +\ttindex{DS} +\ttindex{DS8} + +{\em\begin{tabbing} +valid for: \= Intel, Zilog, Toshiba, NEC, TMS370, Siemens, AMD, \\ + \> M16(C), National, ST9, TMS7000 +\end{tabbing}} + +With this instruction, you can reserve a memory area: +\begin{verbatim} + DS +\end{verbatim} +It is an abbreviation of +\begin{verbatim} + DB DUP (?) +\end{verbatim} +Although this could easily be made by a macro, some people grown up +with Motorola CPUs (Hi Michael!) suggest \tty{DS} to be a built-in +instruction...I hope they are satisfied now \tty{;-)} + +{\tt DS8} is defined as an alias for {\tt DS} on the National SC14xxx. +Beware that the code memory of these processors is organized in words of +16 bits, it is therefore impossible to reserve individual bytes. In case +the argument of {\tt DS} is odd, it will be rounded up to the next even +number. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{BYT or FCB} +\ttindex{BYT}\ttindex{FCB} + +{\em valid for: 6502, 68xx} + +By this instruction, byte constants or ASCII strings are placed in +65xx/68xx-mode, it therefore corresponds to \tty{DC.B} on the 68000 or +\tty{DB} on Intel. Similarly to \tty{DC}, a repetition factor enclosed +in brackets ([..]) may be prepended to every single parameter. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{BYTE} +\ttindex{BYTE} + +{\em valid for: ST6, 320C2(0)x, 320C5x, MSP, TMS9900} + +Ditto. Note that when in 320C2(0)x/5x mode, the assembler assumes that +a label on the left side of this instruction has no type, i.e. it +belongs to no address space. This behaviour is explained in the +processor-specific hints. + +The \tty{PADDING} instruction allows to set whether odd counts of bytes +shall be padded with a zero byte in MSP/TMS9900 mode. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{DC8} +\ttindex{DC8} + +{\em valid for: SC144xx} + +This statement is an alias for {\tt DB}, i.e. it may be used to dump byte +constants or strings to memory. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{ADR or FDB} +\ttindex{ADR}\ttindex{FDB} + +{\em valid for: 6502, 68xx} + +\tty{ADR} resp. \tty{FDB} stores word constants when in 65xx/68xx mode. +It is therefore the equivalent to \tty{DC.W} on the 68000 or \tty{DW} on +Intel platforms. Similarly to \tty{DC}, a repetition factor enclosed +in brackets ([..]) may be prepended to every single parameter. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{WORD} +\ttindex{WORD} + +{\em valid for: ST6, i960, 320C2(0)x, 320C3x, 320C5x, MSP} + +If assembling for the 320C3x or i960, this command stores 32-bit words, +16-bit words for the other families. Note that when in 320C2(0)x/5x mode, +the assembler assumes that a label on the left side of this instruction +has no type, i.e. it belongs to no address space. This behaviour is +explained at the discussion on processor-specific hints. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{DW16} +\ttindex{DW16} + +{\em valid for: SC144xx} + +This instruction is for SC144xx targets a way to dump word (16 bit) +constants to memory. {\tt CAUTION!!} It is therefore an alias for {\tt +DW}. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{LONG} +\ttindex{LONG} + +{\em valid for: 320C2(0)x, 320C5x} + +LONG stores a 32-bit integer to memory with the order LoWord-HiWord. +Note that when in 320C2(0)x/5x mode, the assembler assumes that a label +on the left side of this instruction has no type, i.e. it belongs to +no address space. This behaviour is explained in the +processor-specific hints. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{SINGLE and EXTENDED} +\ttindex{SINGLE}\ttindex{EXTENDED} + +{\em valid for: 320C3x} + +Both commands store floating-point constants to memory. They are \bb{not} +in IEEE-format. Instead the processor-specific formats with 32 and 40 bit +are used. In case of \tty{EXTENDED} the resulting constant occupies two +memory words. The most significant 8 bits (the exponent) are written to +the first word while the other ones (the mantissa) are copied into the second +word. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{FLOAT and DOUBLE} +\ttindex{FLOAT}\ttindex{DOUBLE} + +{\em valid for: 320C2(0)x, 320C5x} + +These two commands store floating-point constants in memory using the +standard IEEE 32-bit and 64-bit IEEE formats. The least significant +byte is copied to the first allocated memory location. Note that +when in 320C2(0)x/5x mode the assembler assumes that all labels on the +left side of an instruction have no type, i.e. they belong to no +address space. This behaviour is explained in the processor-specific +hints. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{EFLOAT, BFLOAT, and TFLOAT} +\ttindex{EFLOAT}\ttindex{BFLOAT}\ttindex{TFLOAT} + +{\em valid for: 320C2(0)x, 320C5x} + +Another three floating point commands. All of them support non-IEEE +formats, which should be easily applicable on signal processors: +\begin{itemize} +\item{\tty{EFLOAT}: mantissa with 16 bits, exponent with 16 bits} +\item{\tty{BFLOAT}: mantissa with 32 bits, exponent with 16 bits} +\item{\tty{DFLOAT}: mantissa with 64 bits, exponent with 32 bits} +\end{itemize} +The three commands share a common storage strategy. In all cases the +mantissa precedes the exponent in memory, both are stored as 2's +complement with the least significant byte first. Note that when in +320C2(0)x/5x mode the assembler assumes that all labels on the left side +of an instruction have no type, i.e. they belong to no address +space. This behaviour is explained in the processor-specific hints. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{Qxx and LQxx} +\ttindex{Qxx}\ttindex{LQxx} + +{\em valid for: 320C2(0)x, 320C5x} + +\tty{Qxx} and \tty{LQxx} can be used to generate constants in a fixed +point format. \tty{xx} denotes a 2-digit number. The operand is first +multiplied by $2^{xx}$ before converting it to binary notation. Thus +\tty{xx} can be viewed as the number of bits which should be reserved for +the fractional part of the constant in fixed point format. \tty{Qxx} +stores only one word (16 bit) while \tty{LQxx} stores two words (low word +first): +\begin{verbatim} + q05 2.5 ; --> 0050h + lq20 ConstPI ; --> 43F7h 0032h +\end{verbatim} +Please do not flame me in case I calculated something wrong on my +HP28... + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{DATA} +\ttindex{DATA} + +{\em valid for: PIC, 320xx, AVR, MELPS-4500, 4004, $\mu$PD772x} + +This command stores data in the current segment. Both integer values as +well as character strings are supported. On 16C5x/16C8x, 17C4x in data +segment and on the 4500, characters occupy one word. On AVR, 17C4x in +code segment, $\mu$PD772x in the data segments, and on 3201x/3202x, in +general two characters fit into one word (LSB first). The $\mu$PD77C25 +can hold three bytees per word in the code segment. When in 320C3x, mode +the assembler puts four characters into one word (MSB first). In contrast +to this characters occupy two memory locations in the data segment of the +4500, similar in the 4004. The range of integer values corresponds to the +word width of each processor in a specific segment. This means that +\tty{DATA} has the same result than \tty{WORD} on a 320C3x (and that of +\tty{SINGLE} if AS recognizes the operand as a floating-point constant). + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{ZERO} +\ttindex{ZERO} + +{\em valid for: PIC} + +Generates a continuous string of zero words in memory. The length is +given by the argument and must not exceed 512. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{FB and FW} +\ttindex{FB}\ttindex{FW} + +{\em valid for: COP8} + +These instruction allow to fill memory blocks with a byte or word +constant. The first operand specifies the size of the memory block +while the second one sets the filling constant itself. The maximum +supported block size is 1024 elements for \tty{FB} and 512 elements for +\tty{FW}. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{ASCII and ASCIZ} +\ttindex{ASCII}\ttindex{ASCIZ} + +{\em valid for: ST6} + +Both commands store string constants to memory. While \tty{ASCII} writes +the character information only, \tty{ASCIZ} additionally appends a zero to +the end of the string. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{STRING and RSTRING} +\ttindex{STRING}\ttindex{RSTRING} + +{\em valid for: 320C2(0)x, 320C5x} + +These commands are functionally equivalent to \tty{DATA}, but integer +values are limited to the range of byte values. This enables two +characters or numbers to be packed together into one word. Both commands +only differ in the order they use to write bytes: \tty{STRING} stores the +upper one first then the lower one, \tty{RSTRING} does this vice versa. +Note that when in 320C2(0)x/5x mode the assembler assumes that a label on the +left side of this instruction has no type, i.e. it belongs to no address +space. This behaviour is explained in the processor-specific hints. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{FCC} +\ttindex{FCC} + +{\em valid for: 6502, 68xx} + +When in 65xx/68xx mode, string constants are generated using this +instruction. In contrast to the original assembler AS11 from Motorola +(this is the main reason why AS understands this command, the +functionality is contained within the \tty{BYT} instruction) you must +enclose the string argument by double quotation marks instead of single +quotation marks or slashes. Similarly to \tty{DC}, a repetition factor +enclosed in brackets ([..]) may be prepended to every single parameter. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{DFS or RMB} +\ttindex{DFS}\ttindex{RMB} + +{\em valid for: 6502, 68xx} + +Reserves a memory block when in 6502/68xx mode. It is therefore the +equivalent to \tty{DS.B} on the 68000 or \tty{DB ?} on Intel platforms. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{BLOCK} +\ttindex{BLOCK} + +{\em valid for: ST6} + +Ditto. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{SPACE} +\ttindex{SPACE} + +{\em valid for: i960} + +Ditto. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{RES} +\ttindex{RES} + +{\em valid for: PIC, MELPS-4500, 3201x, 320C2(0)x, 320C5x, AVR, $\mu$PD772x} + +This command allocates memory. When used in code segments the +argument counts words (10/12/14/16 bit). In data segments it counts +bytes for PICs, nibbles for 4500's and words for the TI devices. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{BSS} +\ttindex{BSS} + +{\em valid for: 320C2(0)x, 320C3x, 320C5x, MSP} + +\tty{BSS} works like \tty{RES}, but when in 320C2(0)x/5x mode, the assembler +assumes that a label on the left side of this instruction has no type, i.e +it belongs to no address space. This behaviour is explained in the +processor-specific hints. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{DSB and DSW} +\ttindex{DSB}\ttindex{DSW} + +{\em valid for: COP8} + +Both instructions allocate memory and ensure compatibility to ASMCOP from +National. While \tty{DSB} takes the argument as byte count, \tty{DSW} +uses it as word count (thus it allocates twice as much memory than +\tty{DSB}). + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{DS16} +\ttindex{DS16} + +{\em valid for: SC144xx} + +This instruction reserves memory in steps of full words, i.e. 16 bits. It +is an alias for {\tt DW}. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{ALIGN} +\ttindex{ALIGN} + +{\em valid for: all processors} + +Takes the argument to align the program counter to a certain address +boundary. AS increments the program counter to the next multiple of the +argument. So, \tty{ALIGN} corresponds to \tty{DS.x} on 68000, but is much +more flexible at the same time. + +Example: +\begin{verbatim} + align 2 +\end{verbatim} +aligns to an even address (PC mod 2 = 0). The contents of the +skipped addresses is left undefined. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{LTORG} +\ttindex{LTORG} + +{\em valid for: SH7x00} + +Although the SH7000 processor can do an immediate register load with +8 bit only, AS shows up with no such restriction. This behaviour is +instead simulated through constants in memory. Storing them in +the code segment (not far away from the register load instruction) +would require an additional jump. AS Therefore gathers the constants +an stores them at an address specified by \tty{LTORG}. Details are +explained in the processor-specific section somewhat later. + +%%--------------------------------------------------------------------------- + +\section{Macro Instructions} + +{\em valid for: all processors} + +Now we finally reach the things that make a macro assembler different +from an ordinary assembler: the ability to define macros (guessed +it !?). + +When speaking about 'macros', I generally mean a sequence of (machine +or pseudo) instructions which are united to a block by special +statements and can then be treated in certain ways. The assembler +knows the following statements to work with such blocks: + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{MACRO} +\ttindex{MACRO}\ttindex{ENDM} + +is probably the most important instruction for macro programming. +The instruction sequence +\begin{verbatim} + MACRO [parameter list] + + ENDM +\end{verbatim} +defines the macro \tty{$<$name$>$} to be the enclosed instruction sequence. +This definition by itself does not generate any code! In turn, from +now on the instruction sequence can simply be called by the name, the +whole construct therefore shortens and simplifies programs. A +parameter list may be added to the macro definition to make things +even more useful. The parameters' names have to be separated by +commas (as usual) and have to conform to the conventions for symbol +names (see section \ref{SectSymConv}) - like the macro name itself. + +A switch to case-sensitive mode influences both macro names and +parameters. + +Similar to symbols, macros are local, i.e. they are only known in a +section and its subsections when the definition is done from within +a section. This behaviour however can be controlled in wide limits +via the options \tty{PUBLIC} and \tty{GLOBAL} described below. + +Apart from the macro parameters themselves, the parameter list may +contain control parameters which influence the processing of the +macro. These parameters are distinguished from normal parameters by +being enclosed in braces. The following control parameters are +defined: +\begin{itemize} +\item{\tty{EXPAND/NOEXPAND}: rule whether the enclosed code shall + be written to the listing when the macro is expanded. The + default is the value set by the pseudo instruction \tty{MACEXP}.} +\item{\tty{PUBLIC[:section name]}: assigns the macro to a parent section + instead of the current section. A section can make macros + accessible for the outer code this way. If the section + specification is missing, the macro becomes completely global, i.e. + it may be referenced from everywhere.} +\item{\tty{GLOBAL[:section name]}: rules that in addition to the macro + itself, another macro shall be generated that has the same contents + but is assigned to the specified section. Its name is constructed by + concatenating the current section's name to the macro name. The + section specified must be a parent section of the current section; + if the specification is missing, the additional macro becomes + globally visible. For example, if a macro \tty{A} is defined in a + section \tty{B} that is a child section of section \tty{C}, an additional + global macro named \tty{C\_B\_A} would be generated. In contrast, if + \tty{C} had been specified as target section, the macro would be named \tty{B\_A} + and be assigned to section \tty{C}. This option is turned off by default + and it only has an effect when it is used from within a section. + The macro defined locally is not influenced by this option.} +\item{\tty{EXPORT/NOEXPORT}: rules whether the definition of this macro + shall be written to a separate file in case the \tty{-M} command line + option was given. This way, definitions of 'private' macros may + be mapped out selectively. The default is FALSE, i.e. the + definition will not be written to the file. The macro will be + written with the concatenated name if the \tty{GLOBAL} option was + additionally present.} +\end{itemize} +The control parameters described above are removed from the parameter +list by AS, i.e. they do not have a further influence on processing +and usage. + +When a macro is called, the parameters given for the call are +textually inserted into the instruction block and the resulting +assembler code is assembled as usual. Zero length parameters are +inserted in case too few parameters are specified. It is important +to note that string constants are not protected from macro +expansions. The old IBM rule: +\begin{quote}{\it + It's not a bug, it's a feature! +}\end{quote} +applies for this detail. The gap was left to allow checking of +parameters via string comparisons. For example, one can analyze a +macro parameter in the following way: +\begin{verbatim} +mul MACRO para,parb + IF UpString("PARA")<>"A" + MOV a,para + ENDIF + IF UpString("PARB")<>"B" + MOV b,parb + ENDIF + mul ab + ENDM +\end{verbatim} +It is important for the example above that the assembler converts all +parameter names to upper case when operating in case-insensitive +mode, but this conversion never takes place inside of string constants. +Macro parameter names therefore have to be written in upper case when +they appear in string constants. + +The same naming rules as for usual symbols also apply for macro +parameters, with the exception that only letters and numbers are +allowed, i.e. dots and underscores are forbidden. This constraint +has its reason in a hidden feature: the underscore allows to +concatenate macro parameter names to a symbol, like in the following +example: +\begin{verbatim} +concat macro part1,part2 + call part1_part2 + endm +\end{verbatim} +The call +\begin{verbatim} + concat module,function +\end{verbatim} +will therefore result in +\begin{verbatim} + call module_function +\end{verbatim} +A small example to remove all clarities ;-) + +A programmer braindamaged by years of programming Intel processors +wants to have the instructions \tty{PUSH/POP} also for the 68000. He +solves the 'problem' in the following way: +\begin{verbatim} +push macro op + move op,-(sp) + endm + +pop macro op + move (sp)+,op + endm +\end{verbatim} +If one writes +\begin{verbatim} + push d0 + pop a2 , +\end{verbatim} +this results in +\begin{verbatim} + move.w d0,-(sp) + move.w (sp)+,a2 +\end{verbatim} +A macro definition must not cross include file boundaries. + +Labels defined in macros always are regarded as being local, an explicit +\tty{LOCAL} instruction is therefore not necessary (it even does not +exist). In case there is a reason to make a label global, one may define +it with \tty{LABEL} which always creates global symbols (similar to \tty{BIT, +SFR...}): +\begin{verbatim} + label $ +\end{verbatim} +When parsing a line, the assembler first checks the macro list +afterwards looks for processor instructions, which is why macros +allow to redefine processor instructions. However, the definition +should appear previously to the first invocation of the instruction +to avoid phase errors like in the following example: +\begin{verbatim} + bsr target + +bsr macro targ + jsr targ + endm + + bsr target +\end{verbatim} +In the first pass, the macro is not known when the first \tty{BSR} +instruction is assembled; an instruction with 4 bytes of length is +generated. In the second pass however, the macro definition is +immediately available (from the first pass), a \tty{JSR} of 6 bytes length +is therefore generated. As a result, all labels following are too low +by 2 and phase errors occur for them. An additional pass is +necessary to resolve this. + +Because a machine or pseudo instruction becomes hidden when a macro +of same name is defined, there is a backdoor to reach the original +meaning: the search for macros is suppressed if the name is prefixed +with an exclamation mark (!). This may come in handy if one wants to +extend existing instructions in their functionality, e.g. the +TLCS-90's shift instructions: +\begin{verbatim} +srl macro op,n ; shift by n places + rept n ; n simple instructions + !srl op + endm + endm +\end{verbatim} +From now on, the \tty{SRL} instruction has an additional parameter... + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{IRP} +\ttindex{IRP} + +is a simplified macro definition for the case that an instruction sequence +shall be applied to a couple of operands and the the code is not needed +any more afterwards. \tty{IRP} needs a symbol for the operand as its +first parameter, and an (almost) arbitrary number of parameters that are +sequentially inserted into the block of code. For example, one can write +\begin{verbatim} + irp op, acc,b,dpl,dph + push op + endm +\end{verbatim} +to push a couple of registers to the stack, what results in +\begin{verbatim} + push acc + push b + push dpl + push dph +\end{verbatim} +Again, labels used are automatically local for every pass. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{IRPC} +\ttindex{IRPC} + +\tty{IRPC} is a variant of \tty{IRP} where the first argument's occurences +in the lines up to \tty{ENDM} are successively replaced by the characters +of a string instead of further parameters. For example, an especially +complicated way of placing a string into memory would be: +\begin{verbatim} + irpc char,"Hello World" + db 'CHAR' + endm +\end{verbatim} +\bb{CAUTION!} As the example already shows, \tty{IRPC} only inserts the +pure character; it is the programmer's task to assure that valid code +results (in this example by inserting quotes, including the detail that no +automatic conversion to uppercase characters is done). + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{REPT} +\ttindex{REPT} + +is the simplest way to employ macro constructs. The code between +\tty{REPT} and \tty{ENDM} is assembled as often as the integer argument of +\tty{REPT} specifies. This statement is commonly used in small loops to +replace a programmed loop to save the loop overhead. + +An example for the sake of completeness: +\begin{verbatim} + rept 3 + rr a + endm +\end{verbatim} +rotates the accumulator to the right by three digits. + +In case \tty{REPT}'s argument is equal to or smaller than 0, no expansion +at all is done. This is different to older versions of AS which used to +be a bit 'sloppy' in this respect and always made a single expansion. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{WHILE} +\ttindex{WHILE} + +\tty{WHILE} operates similarly to \tty{REPT}, but the fixed number of +repetitions given as an argument is replaced by a boolean expression. The +code framed by \tty{WHILE} and \tty{ENDM} is assembled until the +expression becomes logically false. This may mean in the extreme case +that the enclosed code is not assembled at all in case the expression was +already false when the construct was found. On the other hand, it may +happen that the expression stays true forever and AS will run +infinitely...one should apply therefore a bit of accuracy when one uses +this construct, i.e. the code must contain a statement that influences the +condition, e.g. like this: +\begin{verbatim} +cnt set 1 +sq set cnt*cnt + while sq<=1000 + dc.l sq +cnt set cnt+1 +sq set cnt*cnt + endm +\end{verbatim} +This example stores all square numbers up to 1000 to memory. + +Currently there exists a little ugly detail for \tty{WHILE}: an additional +empty line that was not present in the code itself is added after the last +expansion. This is a 'side effect' based on a weakness of the macro +processor and it is unfortunately not that easy to fix. I hope noone +minds... + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{EXITM} +\ttindex{EXITM} + +\tty{EXITM} offers a way to terminate a macro expansion or one of the +instructions \tty{REPT, IRP,} or \tty{WHILE} prematurely. Such an option +helps for example to replace encapsulations with \tty{IF-ENDIF}-ladders in +macros by something more readable. Of course, an \tty{EXITM} itself +always has to be conditional, what leads us to an important detail: When +an \tty{EXITM} is executed, the stack of open \tty{IF} and +\tty{SWITCH} constructs is reset to the state it had just before the macro +expansion started. This is imperative for conditional \tty{EXITM}'s as +the \tty{ENDIF} resp. \tty{ENDCASE} that frames the \tty{EXITM} statement +will not be reached any more; AS would print an error message without this +trick. Please keep also in mind that \tty{EXITM} always only terminates +the innermost construct if macro constructs are nested! If one want to +completely break out of a nested construct, one has to use additional +\tty{EXITM}'s on the higher levels! + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{FUNCTION} +\label{SectFUNCTION} +\ttindex{FUNCTION} + +Though \tty{FUNCTION} is not a macro statement in the inner sense, I will +describe this instruction at this place because it uses similar principles +like macro replacements. + +This instruction is used to define new functions that may then be +used in formula expressions like predefined functions. The +definition must have the following form: +\begin{verbatim} + FUNCTION ,..,, +\end{verbatim} +The arguments are the values that are 'fed into' the function. The +definition uses symbolic names for the arguments. The assembler +knows by this that where to insert the actual values when the +function is called. This can be seen from the following example: +\begin{verbatim} +isdigit FUNCTION ch,(ch>='0')&&(ch<='9') +\end{verbatim} +This function checks whether the argument (interpreted as a character) is +a number in the currently valid character set (the character set can be +modified via \tty{CHARSET}, therefore the careful wording). + +The arguments' names (\tty{CH} in this case) must conform to the stricter +rules for macro parameter names, i.e. the special characters . and \_ +are not allowed. + +User-defined functions can be used in the same way as builtin +functions, i.e. with a list of parameters, separated by commas, +enclosed in parentheses: +\begin{verbatim} + IF isdigit(char) + message "\{char} is a number" + ELSEIF + message "\{char} is not a number" + ENDIF +\end{verbatim} +When the function is called, all parameters are calculated once and +are then inserted into the function's formula. This is done to +reduce calculation overhead and to avoid side effects. The +individual arguments have to be separated by commas when a function +has more than one parameter. + +\bb{CAUTION!} Similar to macros, one can use user-defined functions to +override builtin functions. This is a possible source for phase +errors. Such definitions therefore should be done before the first +call! + +The result's type may depend on the type of the input arguments as +the arguments are textually inserted into the function's formula. +For example, the function +\begin{verbatim} +double function x,x+x +\end{verbatim} +may have an integer, a float, or even a string as result, depending +on the argument's type! + +When AS operates in case-sensitive mode, the case matters when +defining or referencing user-defined functions, in contrast to +builtin functions! + +%%--------------------------------------------------------------------------- + +\section{Conditional Assembly} + +{\em valid for: all processors} + +The assembler supports conditional assembly with the help of statements +like \tty{IF...} resp. \tty{SWITCH...} . These statements work at +assembly time allowing or disallowing the assembly of program parts based +on conditions. They are therefore not to be compared with IF statements +of high-level languages (though it would be tempting to extend assembly +language with structurization statements of higher level languages...). + +The following constructs may be nested arbitrarily (until a memory +overflow occurs). + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{IF / ELSEIF / ENDIF} +\ttindex{IF} +\ttindex{ENDIF} +\ttindex{ELSEIF}\ttindex{ELSE} + +\tty{IF} is the most common and most versatile construct. The general +style of an \tty{IF} statement is as follows: +\begin{verbatim} + IF + . + . + + . + . + ELSEIF + . + . + + . + . + (possibly more ELSEIFs) + + . + . + ELSEIF + . + . + + . + . + ENDIF +\end{verbatim} +\tty{IF} serves as an entry, evaluates the first expression, and assembles +block 1 if the expression is true (i.e. not 0). All further +\tty{ELSEIF}-blocks will then be skipped. However, if the expression is +false, block 1 will be skipped and expression 2 is evaluated. If this +expression turns out to be true, block 2 is assembled. The number of +\tty{ELSEIF} parts is variable and results in an \tty{IF-THEN-ELSE} ladder +of an arbitrary length. The block assigned to the last \tty{ELSEIF} +(without argument) only gets assembled if all previous expressions +evaluated to false; it therefore forms a 'default' branch. It is +important to note that only \bb{one} of the blocks will be assembled: the +first one whose \tty{IF/ELSEIF} had a true expression as argument. + +The \tty{ELSEIF} parts are optional, i.e. \tty{IF} may directly be +followed by an \tty{ENDIF}. An \tty{ELSEIF} without parameters must be +the last branch. + +\tty{ELSEIF} always refers to the innermost, unfinished \tty{IF} construct +in case \tty{IF}'s are nested. + +\ttindex{IFDEF}\ttindex{IFNDEF}\ttindex{IFUSED}\ttindex{IFNUSED} +\ttindex{IFEXIST}\ttindex{IFNEXIST}\ttindex{IFB}\ttindex{IFNB} +In addition to \tty{IF}, the following further conditional statements are +defined: +\begin{itemize} +\item{\tty{IFDEF $<$symbol$>$}: true if the given symbol has been defined. + The definition has to appear before \tty{IFDEF}.} +\item{\tty{IFNDEF $<$symbol$>$}: counterpart to \tty{IFDEF}.} +\item{\tty{IFUSED $<$symbol$>$}: true if if the given symbol has been + referenced at least once up to now.} +\item{\tty{IFNUSED $<$symbol$>$}: counterpart to \tty{IFUSED}.} +\item{\tty{IFEXIST $<$name$>$}: true if the given file exists. The same + rules for search paths and syntax apply as for the + \tty{INCLUDE} instruction (see section \ref{SectInclude}).} +\item{\tty{IFNEXIST $<$name$>$}: counterpart to \tty{IFEXIST}.} +\item{\tty{IFB $<$arg-list$>$}: true if all arguments of the parameter + list are empty strings.} +\item{\tty{IFNB $<$arg-list$>$}: counterpart to \tty{IFB}.} +\end{itemize} + +It is valid to write {\tt ELSE} instead of {\tt ELSEIF} since everybody +seems to be used to it... + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{SWITCH / CASE / ELSECASE / ENDCASE} +\ttindex{SWITCH}\ttindex{CASE}\ttindex{ELSECASE}\ttindex{ENDCASE} + +\tty{CASE} is a special case of \tty{IF} and is designed for situations +when an expression has to be compared with a couple of values. This could +of course also be done with a series of \tty{ELSEIF}s, but the following +form +\begin{verbatim} + SWITCH + . + . + CASE + . + + . + CASE + . + + . + (further CASE blocks) + . + CASE + . + + . + ELSECASE + . + + . + ENDCASE +\end{verbatim} +has the advantage that the expression is only written once and also only +gets evaluated once. It is therefore less error-prone and slightly faster +than an \tty{IF} chain, but obviously not as flexible. + +It is possible to specify multiple values separated by commas to a +\tty{CASE} statement in order to assemble the following block in multiple +cases. The \tty{ELSECASE} branch again serves as a 'trap' for the case +that none of the \tty{CASE} conditions was met. AS will issue a warning +in case it is missing and all comparisons fail. + +Even when value lists of \tty{CASE} branches overlap, only \bb{one} branch +is executed, which is the first one in case of ambiguities. + +\tty{SWITCH} only serves to open the whole construct; an arbitrary number +of statements may be between \tty{SWITCH} and the first \tty{CASE} (but +don't leave other \tty{IF}s open!), for the sake of better readability +this should however not be done. + +%%--------------------------------------------------------------------------- + +\section{Listing Control} + +{\em valid for: all processors} + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{PAGE} +\ttindex{PAGE} + +\tty{PAGE} is used to tell AS the dimensions of the paper that is used to +print the assembly listing. The first parameter is thereby the +number of lines after which AS shall automatically output a form +feed. One should however take into account that this value does \bb{not} +include heading lines including an eventual line specified with +\tty{TITLE}. The minimum number of lines is 5, and the maximum value is +255. A specification of 0 has the result that AS will not do any form +feeds except those triggered by a \tty{NEWPAGE} instruction or those +implicitly engaged at the end of the assembly listing (e.g. prior to the +symbol table). + +The specification of the listing's length in characters is an +optional second parameter and serves two purposes: on the one hand, +the internal line counter of AS will continue to run correctly when a +source line has to be split into several listing lines, and on +the other hand there are printers (like some laser printers) that do +not automatically wrap into a new line at line end but instead simply +discard the rest. For this reason, AS does line breaks by itself, +i.e. lines that are too long are split into chunks whose lengths are +equal to or smaller than the specified width. This may lead to +double line feeds on printers that can do line wraps on their own if +one specifies the exact line width as listing width. The solution +for such a case is to reduce the assembly listing's width by 1. The +specified line width may lie between 5 and 255 characters; a line +width of 0 means similarly to the page length that AS shall not do +any splitting of listing lines; lines that are too long of course +cannot be taken into account of the form feed then any more. + +The default setting for the page length is 60 lines, the default for the +line width is 0; the latter value is also assumed when \tty{PAGE} is +called with only one parameter. + +\bb{CAUTION!} There is no way for AS to check whether the specified +listing length and width correspond to the reality! + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{NEWPAGE} +\ttindex{NEWPAGE} + +\tty{NEWPAGE} can be used to force a line feed though the current line is +not full up to now. This might be useful to separate program parts +in the listing that are logically different. The internal line +counter is reset and the page counter is incremented by one. The +optional parameter is in conjunction with a hierarchical page +numbering AS supports up to a chapter depth of 4. 0 always refers to +the lowest depth, and the maximum value may vary during the assembly +run. This may look a bit puzzling, as the following example shows: +\begin{quote}\begin{tabbing} +\hspace{2.5cm} \= \hspace{4.5cm} \= \kill +page 1, \> instruction \tty{NEWPAGE 0} \> $\rightarrow$ page 2 \\ +page 2, \> instruction \tty{NEWPAGE 1} \> $\rightarrow$ page 2.1 \\ +page 2.1, \> instruction \tty{NEWPAGE 1} \> $\rightarrow$ page 3.1 \\ +page 3.1, \> instruction \tty{NEWPAGE 0} \> $\rightarrow$ page 3.2 \\ +page 3.2, \> instruction \tty{NEWPAGE 2} \> $\rightarrow$ page 4.1.1 \\ +\end{tabbing}\end{quote} +\tty{NEWPAGE $<$number$>$} may therefore result in +changes in different digits, depending on the current chapter depth. An +automatic form feed due to a line counter overflow or a \tty{NEWPAGE} +without parameter is equal to \tty{NEWPAGE 0}. Previous to the output of +the symbol table, an implicit \tty{NEWPAGE $<$maximum up to now$>$} is +done to start a new 'main chapter'. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{MACEXP} +\ttindex{MACEXP} + +One can achieve by the statement +\begin{verbatim} + macexp off +\end{verbatim} +that only the macro call and not the expanded text is listed for +macro expansions. This is sensible for macro intensive codes to +avoid that the listing grows beyond all bounds. The full listing can +be turned on again with a +\begin{verbatim} + macexp on . +\end{verbatim} +This is also the default. + +There is a subtle difference between the meaning of \tty{MACEXP} for +macros and for all other macro-like constructs (e.g. \tty{REPT}): while a +macro contain an internal flag that rules whether expansions of this macro +shall be listed or not, \tty{MACEXP} directly influences all other +constructs that are resolved 'in place'. The reason for this +differentiation is that there may be macros that are tested and their +expansion is therefore unnecessary, but all other macros still shall be +expanded. \tty{MACEXP} serves as a default for the macro's internal flag +when it is defined, and it may be overridden by the \tty{NOEXPAND} resp. +\tty{EXPAND} directives. + +The current setting may be read from the symbol \tty{MACEXP}. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{LISTING} +\ttindex{LISTING} + +works like \tty{MACEXP} and accepts the same parameters, but is much more +radical: After a +\begin{verbatim} + listing off , +\end{verbatim} +nothing at all will be written to the listing. This directive makes sense +for tested code parts or include files to avoid a paper consumption going +beyond all bounds. \bb{CAUTION!} If one forgets to issue the counterpart +somewhere later, even the symbol table will not be written any more! In +addition to \tty{ON} and \tty{OFF}, \tty{LISTING} also accepts +\tty{NOSKIPPED} and \tty{PURECODE} as arguments. Program parts that were +not assembled due to conditional assembly will not be written to the +listing when \tty{NOSKIPPED} is set, while \tty{PURECODE} - as the name +indicates - even suppresses the \tty{IF} directives themselves in the +listing. These options are useful if one uses macros that act differently +depending on parameters and one only wants to see the used parts in the +listing. + +The current setting may be read from the symbol \tty{LISTING} (0=\tty{OFF}, +1=\tty{ON}, 2=\tty{NOSKIPPED}, 3=\tty{PURECODE}). + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{PRTINIT and PRTEXIT} +\ttindex{PRTINIT}\ttindex{PRTEXIT} + +Quite often it makes sense to switch to another printing mode (like +compressed printing) when the listing is sent to a printer and to +deactivate this mode again at the end of the listing. The output of +the needed control sequences can be automated with these instructions +if one specifies the sequence that shall be sent to the output device +prior to the listing with \tty{PRTINIT $<$string$>$} and similarly the +deinitialization string with \tty{PRTEXIT $<$string$>$}. +\tty{$<$string$>$} has to be a string expression in both cases. The syntax +rules for string constants allow to insert control characters into the +string without too much tweaking. + +When writing the listing, the assembler does \bb{not} differentiate where +the listing actually goes, i.e. printer control characters are sent to the +screen without mercy! + +Example: + +For Epson printers, it makes sense to switch them to compressed +printing because listings are so wide. The lines +\begin{verbatim} + prtinit "\15" + prtexit "\18" +\end{verbatim} +assure that the compressed mode is turned on at the beginning of the +listing and turned off afterwards. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{TITLE} +\ttindex{TITLE} + +The assembler normally adds a header line to each page of the listing +that contains the source file's name, date, and time. This +statement allows to extend the page header by an arbitrary additional +line. The string that has to be specified is an arbitrary string +expression. + +Example: + +For the Epson printer already mentioned above, a title line shall be +written in wide mode, which makes it necessary to turn off the +compressed mode before: +\begin{verbatim} + title "\18\14Wide Title\15" +\end{verbatim} +(Epson printers automatically turn off the wide mode at the end of a +line.) + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{RADIX} +\ttindex{RADIX} + +\tty{RADIX} with a numerical argument between 2 and 36 sets the default +numbering system for integer constants, i.e. the numbering system used if +nothing else has been stated explicitly. The default is 10, and there are +some possible pitfalls to keep in mind which are described in section +\ref{SectIntConsts}. + +Independent of the current setting, the argument of {\tt RADIX} is {\em +always decimal}; furthermore, no symbolic or formula expressions may be +used as argument. Only use simple constant numbers! + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{OUTRADIX} +\ttindex{OUTRADIX} + +\tty{OUTRADIX} can in a certain way be regarded as the opposite to +\tty{RADIX}: This statement allows to configure which numbering system to +use for integer results when \verb!\{...}! constructs are used in string +constants (see section \ref{SectStringConsts}). Valid arguments range +again from 2 to 36, while the default is 16. + +%%--------------------------------------------------------------------------- + +\section{Local Symbols} +\label{ChapLocSyms} + +{\em valid for: all processors} + +local symbols and the section concept introduced with them are a +completely new function that was introduced with version 1.39. One +could say that this part is version ''1.0'' and therefore probably not +the optimum. Ideas and (constructive) criticism are therefore +especially wanted. I admittedly described the usage of sections how +I imagined it. It is therefore possible that the reality is not +entirely equal to the model in my head. I promise that in case of +discrepancies, changes will occur that the reality gets adapted to +the documentation and not vice versa (I was told that the latter +sometimes takes place in larger companies...). + +AS does not generate linkable code (and this will probably not change +in the near future \tty{:-(}). This fact forces one to always assemble a +program in a whole. In contrast to this technique, a separation into +linkable modules would have several advantages: +\begin{itemize} +\item{shorter assembly times as only the modified modules have to be + reassembled;} +\item{the option to set up defined interfaces among modules by definition + of private and public symbols;} +\item{the smaller length of the individual modules reduces the number of + symbols per module and therefore allows to use shorter symbol names + that are still unique.} +\end{itemize} +Especially the last item was something that always nagged me: once +there was a label's name defined at the beginning of a 2000-lines +program, there was no way to reuse it somehow - even not at the +file's other end where routines with a completely different context +were placed. I was forced to use concatenated names in the style of +\begin{verbatim} + _ +\end{verbatim} +that had lengths ranging from 15 to 25 characters and made the +program difficult to overlook. The concept of section described in +detail in the following text was designed to cure at least the second +and third item of the list above. It is completely optional: if you +do not want to use sections, simply forget them and continue to work +like you did with previous versions of AS. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{Basic Definition (SECTION/ENDSECTION)} +\ttindex{SECTION}\ttindex{ENDSECTION} + +A section represents a part of the assembler program enclosed by +special statements and has a unique name chosen by the programmer: +\begin{verbatim} + . + . + + . + . + SECTION + . + . + + . + . + ENDSECTION [section's name] + . + . + + . + . +\end{verbatim} +The name of a section must conform to the conventions for s symbol +name; AS stores section and symbol names in separate tables which is +the reason why a name may be used for a symbol and a section at the +same time. Section names must be unique in a sense that there must +not be more than one section on the same level with the same name (I +will explain in the next part what ''levels'' mean). The argument of +\tty{ENDSECTION} is optional, it may also be omitted; if it is omitted, AS +will show the section's name that has been closed with this +\tty{ENDSECTION}. Code inside a section will be processed by AS exactly +as if it were outside, except for three decisive differences: +\begin{itemize} +\item{Symbols defined within a section additionally get an internally + generated number that corresponds to the section. These symbols + are not accessible by code outside the section (this can be + changed by pseudo instructions, later more about this).} +\item{The additional attribute allows to define symbols of the same + name inside and outside the section; the attribute makes it + possible to use a symbol name multiple times without getting error + messages from AS.} +\item{If a symbol of a certain name has been defined inside and outside + of a section, the ''local'' one will be preferred inside the + section, i.e. AS first searches the symbol table for a symbol of + the referenced name that also was assigned to the section. A + search for a global symbol of this name only takes place if the + first search fails.} +\end{itemize} +This mechanism e.g. allows to split the code into modules as one +might have done it with linkable code. A more fine-grained approach +would be to pack every routine into a separate section. Depending on +the individual routines' lengths, the symbols for internal use may +obtain very short names. + +AS will by default not differentiate between upper and lower case in +section names; if one however switches to case-sensitive mode, the +case will be regarded just like for symbols. + +The organization described up to now roughly corresponds to what is +possible in the C language that places all functions on the same +level. However, as my ''high-level'' ideal was Pascal and not C, I +went one step further: + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{Nesting and Scope Rules} + +It is valid to define further sections within a section. This is +analog to the option given in Pascal to define procedures inside a +procedure or function. The following example shows this: +\begin{verbatim} +sym EQU 0 + + SECTION ModuleA + + SECTION ProcA1 + +sym EQU 5 + + ENDSECTION ProcA1 + + SECTION ProcA2 + +sym EQU 10 + + ENDSECTION ProcA2 + + ENDSECTION ModuleA + + + SECTION ModuleB + +sym EQU 15 + + SECTION ProcB + + ENDSECTION ProcB + + ENDSECTION ModuleB +\end{verbatim} +When looking up a symbol, AS first searches for a symbol assigned to +the current section, and afterwards traverses the list of parent +sections until the global symbols are reached. In our example, the +individual sections see the values given in table \ref{TabSymErg} for +the symbol \tty{sym}: +\begin{table*}[htb] +\begin{center}\begin{tabular}{|l|l|l|} +\hline +section & value & from section... \\ +\hline +\hline +Global & 0 & Global \\ +\hline +\tty{ModuleA} & 0 & Global \\ +\hline +\tty{ProcA1} & 5 & \tty{ProcA1} \\ +\hline +\tty{ProcA2} & 10 & \tty{ProcA2} \\ +\hline +\tty{ModuleB} & 15 & \tty{ModuleB} \\ +\hline +\tty{ProcB} & 15 & \tty{ModuleB} \\ +\hline +\end{tabular}\end{center} +\caption{Valid values for the Individual Sections\label{TabSymErg}} +\end{table*} +This rule can be overridden by explicitly appending a section's name +to the symbol's name. The section's name has to be enclosed in +brackets: +\begin{verbatim} + move.l #sym[ModulB],d0 +\end{verbatim} +Only sections that are in the parent section path of the current +section may be used. The special values \tty{PARENT0..PARENT9} are allowed +to reference the n-th ''parent'' of the current section; \tty{PARENT0} is +therefore equivalent to the current section itself, \tty{PARENT1} the +direct parent and so on. \tty{PARENT1} may be abbreviated as \tty{PARENT}. If +no name is given between the brackets, like in this example: +\begin{verbatim} + move.l #sym[],d0 , +\end{verbatim} +one reaches the global symbol. \bb{CAUTION!} If one explicitly +references a symbol from a certain section, AS will only seek for +symbols from this section, i.e. the traversal of the parent sections +path is omitted! + +Similar to Pascal, it is allowed that different sections have +subsections of the same name; the principle of locality avoids +irritations. One should IMHO still use this feature as seldom as +possible: Symbols listed in the symbol resp. cross reference list are +only marked with the section they are assigned to, not with the +''section hierarchy'' lying above them (this really would have busted +the available space); a differentiation is made very difficult this +way. + +As a \tty{SECTION} instruction does not define a label by itself, the +section concept has an important difference to Pascal's concept of +nested procedures: a pascal procedure can automatically ''see'' its +subprocedures(functions), AS requires an explicit definition of an +entry point. This can be done e.g. with the following macro pair: +\begin{verbatim} +proc MACRO name + SECTION name +name LABEL $ + ENDM + +endp MACRO name + ENDSECTION name + ENDM +\end{verbatim} +This example also shows that the locality of labels inside macros +is not influenced by sections. It makes the trick with the \tty{LABEL} +instruction necessary. + +This does of course not solve the problem completely. The label is +still local and not referencable from the outside. Those who think +that it would suffice to place the label in front of the \tty{SECTION} +statement should be quiet because they would spoil the bridge to the +next theme: + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{PUBLIC and GLOBAL} +\ttindex{PUBLIC}\ttindex{GLOBAL} + +The \tty{PUBLIC} statement allows to change the assignment of a symbol to +a certain section. It is possible to treat multiple symbols with one +statement, but I will use an example with only one symbol in the following +(not hurting the generality of this discussion). In the simplest case, +one declares a symbol to be global, i.e. it can be referenced from +anywhere in the program: +\begin{verbatim} + PUBLIC +\end{verbatim} +As a symbol cannot be moved in the symbol table once it has been sorted +in, this statement has to appear \bb{before} the symbol itself is +defined. AS stores all \tty{PUBLICs} in a list and removes an entry from +this list when the corresponding symbol is defined. AS prints errors at +the end of a section in case that not all \tty{PUBLICs} have been +resolved. + +Regarding the hierarchical section concept, the method of defining a +symbol as purely global looks extremely brute. There is fortunately +a way to do this in a bit more differentiated way: by appending a +section name: +\begin{verbatim} + PUBLIC :
+\end{verbatim} +The symbol will be assigned to the referenced section and therefore also +becomes accessible for all its subsections (except they define a symbol of +the same name that hides the ''more global'' symbol). AS will naturally +protest if several subsections try to export a symbol of same name to the +same level. The special \tty{PARENTn} values mentioned in the previous +section are also valid for \tty{$<$section$>$} to export a symbol exactly +\tty{n} levels up in the section hierarchy. Otherwise only sections that +are parent sections of the current section are valid for +\tty{$<$section$>$}. Sections that are in another part of the section +tree are not allowed. If several sections in the parent section path +should have the same name (this is possible), the lowest level will be +taken. + +This tool lets the abovementioned macro become useful: +\begin{verbatim} +proc MACRO name + SECTION name + PUBLIC name:PARENT +name LABEL $ + ENDM +\end{verbatim} +This setting is equal to the Pascal model that also only allows the +''father'' to see its children, but not the ''grandpa''. + +AS will quarrel about double-defined symbols if more than one section +attempts to export a symbol of a certain name to the same upper section. +This is by itself a correct reaction, and one needs to ''qualify'' symbols +somehow to make them distinguishable if these exports were deliberate. A +\tty{GLOBAL} statement does just this. The syntax of \tty{GLOBAL} is +identical to \tty{PUBLIC}, but the symbol stays local instead of being +assigned to a higher section. Instead, an additional symbol of the same +value but with the subsection's name appended to the symbol's name is +created, and only this symbol is made public according to the section +specification. If for example two sections \tty{A} and \tty{B} both +define a symbol named \tty{SYM} and export it with a \tty{GLOBAL} +statement to their parent section, the symbols are sorted in under the +names \tty{A\_SYM} resp. \tty{B\_SYM} . + +In case that source and target section are separated by more than one +level, the complete name path is prepended to the symbol name. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{FORWARD} +\ttindex{FORWARD} + +The model described so far may look beautiful, but there is an +additional detail not present in Pascal that may spoil the happiness: +Assembler allows forward references. Forward references may lead to +situations where AS accesses a symbol from a higher section in the +first pass. This is not a disaster by itself as long as the correct +symbol is used in the second pass, but accidents of the following +type may happen: +\begin{verbatim} +loop: . + + . + . + SECTION sub + . ; *** + . + bra.s loop + . + . +loop: . + . + ENDSECTION + . + . + jmp loop ; main loop +\end{verbatim} +AS will take the global label \tty{loop} in the first pass and will +quarrel about an out-of-branch situation if the program part at +\tty{$<$code$>$} is long enough. The second pass will not be +started at all. One way to avoid the ambiguity would be to +explicitly specify the symbol's section: +\begin{verbatim} + bra.s loop[sub] +\end{verbatim} +If a local symbol is referenced several times, the brackets can be saved +by using a \tty{FORWARD} statement. The symbol is thereby explicitly +announced to be local, and AS will only look in the local symbol table +part when this symbol is referenced. For our example, the statement +\begin{verbatim} + FORWARD loop +\end{verbatim} +should be placed at the position marked with \tty{***}. + +\tty{FORWARD} must not only be stated prior to a symbol's definition, but +also prior to its first usage in a section to make sense. It does not +make sense to define a symbol private and public; this will be regarded as +an error by AS. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{Performance Aspects} + +The multi-stage lookup in the symbol table and the decision to which +section a symbol shall be assigned of course cost a bit of time to +compute. An 8086 program of 1800 lines length for example took 34.5 +instead of 33 seconds after a modification to use sections (80386 SX, +16MHz, 3 passes). The overhead is therefore limited. As it has +already been stated at the beginning, is is up to the programmer if +(s)he wants to accept it. One can still use AS without sections. + +%%--------------------------------------------------------------------------- + +\section{Miscellaneous} + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{SHARED} +\label{ChapShareOrder} +\ttindex{SHARED} + +{\em valid for: all processors} + +This statement instructs AS to write the symbols given in the +parameter list (regardless if they are integer, float or string +symbols) together with their values into the share file. It depends +upon the command line parameters described in section +\ref{SectCallConvention} whether such a file is generated at all and in +which format it is written. If AS detects this instruction and no share +file is generated, a warning is the result. + +\bb{CAUTION!} A comment possibly appended to the statement itself will be +copied to the first line outputted to the share file (if \tty{SHARED}'s +argument list is empty, only the comment will be written). In case a +share file is written in C or Pascal format, one has to assure that +the comment itself does not contain character sequences that close +the comment (''*/'' resp. ''*)''). AS does not check for this! + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{INCLUDE} +\label{SectInclude} +\ttindex{INCLUDE} + +{\em valid for: all processors} + +This instruction inserts the file given as a parameter into the just as +if it would have been inserted with an editor (the file name may +optionally be enclosed with '' characters). This instruction is +useful to split source files that would otherwise not fit into the +editor or to create ''tool boxes''. + +In case that the file name does not have an extension, it will +automatically be extended with \tty{INC}. + +Via the \tty{-i $<$path list$>$} option, one can specify a list of +directories that will automatically be searched for the file. If the +file is not found, a \bb{fatal} error occurs, i.e. assembly terminates +immediately. + +For compatibility reasons, it is valid to enclose the file name in '' +characters, i.e. +\begin{verbatim} + include stddef51 +\end{verbatim} +and +\begin{verbatim} + include "stddef51.inc" +\end{verbatim} +are equivalent. \bb{CAUTION!} This freedom of choice is the reason why +only a string constant but no string expression is allowed! + +The search list is ignored if the file name itself contains a path +specification. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{BINCLUDE} +\ttindex{BINCLUDE} + +{\em valid for: all processors} + +\tty{BINCLUDE} can be used to embed binary data generated by other programs +into the code generated by AS (this might theoretically even be code +created by AS itself...). \tty{BINCLUDE} has three forms: +\begin{verbatim} + BINCLUDE +\end{verbatim} +This way, the file is completely included. +\begin{verbatim} + BINCLUDE , +\end{verbatim} +This way, the file's contents are included starting at \tty{} up to +the file's end. +\begin{verbatim} + BINCLUDE ,, +\end{verbatim} +This way, \tty{$<$length$>$} bytes are included starting at +\tty{$<$offset$>$}. + +The same rules regarding search paths apply as for \tty{INCLUDE}. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{MESSAGE, WARNING, ERROR, and FATAL} +\ttindex{MESSAGE}\ttindex{WARNING}\ttindex{ERROR}\ttindex{FATAL} +{\em valid for: all processors} + +Though the assembler checks source files as strict as possible and +delivers differentiated error messages, it might be necessary from +time to time to issue additional error messages that allow an +automatic check for logical error. The assembler distinguishes +among three different types of error messages that are accessible to +the programmer via the following three instructions: +\begin{itemize} +\item{\tty{WARNING}: Errors that hint at possibly wrong or inefficient + code. Assembly continues and a code file is generated.} +\item{\tty{ERROR}: True errors in a program. Assembly continues to + allow detection of possible further errors in the same pass. + A code file is not generated.} +\item{\tty{FATAL}: Serious errors that force an immediate termination + of assembly. A code file may be generated but will be incomplete.} +\end{itemize} +All three instructions have the same format for the message that shall +be issued: an arbitrary (possibly computed?!) string expression which +may therefore be either a constant or variable. + +These instructions generally only make sense in conjunction wit +conditional assembly. For example, if there is only a limited +address space for a program, one can test for overflow in the +following way: +\begin{verbatim} +ROMSize equ 8000h ; 27256 EPROM + +ProgStart: + . + . + + . + . +ProgEnd: + + if ProgEnd-ProgStart>ROMSize + error "\athe program is too long!" + endif +\end{verbatim} +Apart from the instructions generating errors, there is also an +instruction \tty{MESSAGE} that simply prints a message to the console +resp. to the assembly listing. Its usage is equal to the other three +instructions. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{READ} +\ttindex{READ} + +{\em valid for: all processors} + +One could say that \tty{READ} is the counterpart to the previous +instruction group: it allows to read values from the keyboard during +assembly. You might ask what this is good for. I will break with +the previous principles and put an example before the exact +description to outline the usefulness of this instruction: + +A program needs for data transfers a buffer of a size that should be +set at assembly time. One could store this size in a symbol defined +with \tty{EQU}, but it can also be done interactively with \tty{READ}: +\begin{verbatim} + IF MomPass=1 + READ "buffer size",BufferSize + ENDIF +\end{verbatim} +Programs can this way configure themselves dynamically during assembly +and one could hand over the source to someone who can assemble it +without having to dive into the source code. The \tty{IF} conditional +shown in the example should always be used to avoid bothering the +user multiple times with questions. + +\tty{READ} is quite similar to \tty{SET} with the difference that the +value is read from the keyboard instead of the instruction's arguments. +This for example also implies that AS will automatically set the symbol's +type (integer, float or string) or that it is valid to enter formula +expressions instead of a simple constant. + +\tty{READ} may either have one or two parameters because the prompting +message is optional. AS will print a message constructed from the +symbol's name if it is omitted. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{RELAXED} +\label{SectRELAXED} +\ttindex{RELAXED} + +{\em valid for: all processors} + +By default, AS assigns a distinct syntax for integer constants to a +processor family (which is in general equal to the manufacturer's +specifications, as long as the syntax is not too bizarre...). +Everyone however has his own preferences for another syntax and may +well live with the fact that his programs cannot be translated any +more with the standard assembler. If one places the instruction +\begin{verbatim} + RELAXED ON +\end{verbatim} +right at the program's beginning, one may furtherly use any syntax +for integer constants, even mixed in a program. AS tries to guess +automatically for every expression the syntax that was used. This +automatism does not always deliver the result one might have in mind, +and this is also the reason why this option has to be enable +explicitly: if there are no prefixes or postfixes that unambiguously +identify either Intel or Motorola syntax, the C mode will be used. +Leading zeroes that are superfluous in other modes have a meaning in +this mode: +\begin{verbatim} + move.b #08,d0 +\end{verbatim} +This constant will be understood as an octal constant and will result +in an error message as octal numbers may only contain digits from 0 +to 7. One might call this a lucky case; a number like 077 would +result in trouble without getting a message about this. Without the +relaxed mode, both expressions unambiguously would have been +identified as decimal constants. + +The current setting may be read from a symbol with the same name. + +%%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +\subsection{END} +\ttindex{END} + +{\em valid for: all processors} + +\tty{END} marks the end of an assembler program. Lines that eventually +follow in the source file will be ignored. \bb{IMPORTANT:} \tty{END} may +be called from within a macro, but the \tty{IF}-stack for conditional +assembly is not cleared automatically. The following construct therefore +results in an error: +\begin{verbatim} + IF DontWantAnymore + END + ELSEIF +\end{verbatim} +\tty{END} may optionally have an integer expression as argument that marks +the program's entry point. AS stores this in the code file with a special +record and it may be post-processed e.g. with P2HEX. + +\tty{END} has always been a valid instruction for AS, but the only reason +for this in earlier releases of AS was compatibility; \tty{END} had no +effect. + +%%=========================================================================== + +\cleardoublepage +\chapter{Processor-specific Hints} + +When writing the individual code generators, I strived for a maximum +amount of compatibility to the original assemblers. However, I only did this +as long as it did not mean an unacceptable additional amount of work. +I listed important differences, details and pitfalls in the following +chapter. + +%%--------------------------------------------------------------------------- + +\section{6811} + +''Where can I buy such a beast, a HC11 in NMOS?'', some of you might +ask. Well, of course it does not exist, but an H cannot be +represented in a hexadecimal number (older versions of AS would not +have accepted such a name because of this), and so I decided to omit +all the letters... +\par +\begin{quote}{\it +''Someone stating that something is impossible should be at least as + cooperative as not to hinder the one who currently does it.'' +}\end{quote} +From time to time, one is forced to revise one's opinions. Some versions +earlier, I stated at his place that I couldn't use AS's parser in a way +that it is also possible to to separate the arguments of \tty{BSET/BCLR} +resp. \tty{BRSET/BRCLR} with spaces. However, it seems that it can do +more than I wanted to believe...after the n+1th request, I sat down once +again to work on it and things seem to work now. You may use either +spaces or commas, but not in all variants, to avoid ambiguities: for +every variant of an instruction, it is possible to use only commas or a +mixture of spaces and commas as Motorola seems to have defined it (their +data books do not always have the quality of the corresponding +hardware...): +\begin{verbatim} + Bxxx abs8 #mask is equal to Bxxx abs8,#mask + Bxxx disp8,X #mask is equal to Bxxx disp8,X,#mask + BRxxx abs8 #mask addr is equal to BRxxx abs8,#mask,addr + BRxxx disp8,X #mask addr is equal to BRxxx disp8,X,#mask,addr +\end{verbatim} +In this list, \tty{xxx} is a synonym either for \tty{SET} or \tty{CLR}; +\tty{\#mask} is the bit mask to be applied (the \# sign is optional). Of +course, the same statements are also valid for Y-indexed expression (not +listed here). + +%%--------------------------------------------------------------------------- + +\section{PowerPC} + +Of course, it is a bit crazy idea to add support in AS for a +processor that was mostly designed for usage in work stations. +Remember that AS mainly is targeted at programmers of single board +computers. But things that today represent the absolute high end in +computing will be average tomorrow and maybe obsolete the next day, +and in the meantime, the Z80 as the 8088 have been retired as CPUs +for personal computers and been moved to the embedded market; +modified versions are marketed as microcontrollers. With the +appearance of the MPC505 and PPC403, my suspicion has proven to be +true that IBM and Motorola try to promote this architecture in as +many fields as possible. + +However, the current support is a bit incomplete: Temporarily, the +Intel-style mnemonics are used to allow storage of data and the more +uncommon RS/6000 machine instructions mentioned in \cite{Mot601} are +missing (hopefully noone misses them!). I will finish this as soon +as information about them is available! + +%%--------------------------------------------------------------------------- + +\section{DSP56xxx} + +Motorola, which devil rode you! Which person in your company had the +''brilliant'' idea to separate the parallel data transfers with spaces! +In result, everyone who wants to make his code a bit more readable, +e.g. like this: +\begin{verbatim} + move x:var9 ,r0 + move y:var10,r3 , +\end{verbatim} +is p****ed because the space gets recognized as a separator for +parallel data transfers! + +Well...Motorola defined it that way, and I cannot change it. Using +tabs instead of spaces to separate the parallel operations is also +allowed, and the individual operations' parts are again separated +with commas, as one would expect it. + +\cite{Mot56} states that instead of using \tty{MOVEC, MOVEM, ANDI} or +\tty{ORI}, it is also valid to use the more general Mnemonics \tty{MODE, +AND} or \tty{OR}. +AS (currently) does not support this. + +%%--------------------------------------------------------------------------- + +\section{H8/300} + +Regarding the assembler syntax of these processors, Hitachi generously +copied from Motorola (that wasn't by far the worst choice...), +unfortunately the company wanted to introduce its own format for +hexadecimal numbers. To make it even worse, it is a format that uses +unbalanced single quotes, just like Microchip does. This is something I +could not (I even did not want to) reproduce with AS, as AS uses single +quotes to surround ASCII character sequences. Instead, one has to write +hexadecimal numbers in the well-known Motorola syntax: with a leading +dollar sign. + +%%--------------------------------------------------------------------------- + +\section{SH7000/7600/7700} + +Unfortunately, Hitachi once again used their own format for +hexadecimal numbers, and once again I was not able to reproduce this +with AS...please use Motorola syntax! + +When using literals and the \tty{LTORG} instruction, a few things have to +be kept in mind if you do not want to suddenly get confronted with strange +error messages: + +Literals exist due to the fact that the processor is unable to load +constants out of a range of -128 to 127 with immediate addressing. +AS (and the Hitachi assembler) hide this inability by the automatic +placement of constants in memory which are then referenced via +PC-relative addressing. The question that now arises is where to +locate these constants in memory. AS does not automatically place a +constant in memory when it is needed; instead, they are collected +until an LTORG instruction occurs. The collected constants are then +dumped en bloc, and their addresses are stored in ordinary labels +which are also visible in the symbol table. Such a label's name is +of the form +\begin{verbatim} + LITERAL_s_xxxx_n . +\end{verbatim} +In this name, \tty{s} represents the literal's type. Possible values are +\tty{W} for 16-bit constants, \tty{L} for 32-bit constants and \tty{F} for +forward references where AS cannot decide in anticipation which size is +needed. In case of \tty{s=W} or \tty{L}, \tty{xxxx} denotes the +constant's value in a hexadecimal notation, whereas \tty{xxxx} is a simple +running number for forward references (in a forward reference, one does +not know the value of a constant when it is referenced, so one obviously +cannot incorporate its value into the name). \tty{n} is a counter that +signifies how often a literal of this value previously occurred in the +current section. Literals follow the standard rules for localization by +sections. It is therefore absolutely necessary to place literals that +were generated in a certain section before the section is terminated! + +The numbering with \tty{n} is necessary because a literal may occur +multiple times in a section. One reason for this situation is that +PC-relative addressing only allows positive offsets; Literals that +have once been placed with an \tty{LTORG} can therefore not be referenced +in the code that follows. The other reason is that the displacement +is generally limited in length (512 resp. 1024 bytes). + +An automatic \tty{LTORG} at the end of a program or previously to +switching to a different target CPU does not occur; if AS detects unplaced +literals in such a situation, an error message is printed. + +As the PC-relative addressing mode uses the address of the current +instruction plus 4, it is not possible to access a literal that is +stored directly after the instruction, like in the following example: +\begin{verbatim} + mov #$1234,r6 + ltorg +\end{verbatim} +This is a minor item since the CPU anyway would try to execute the +following data as code. Such a situation should not occur in a real +program...another pitfall is far more real: if PC-relative addressing +occurs just behind a delayed branch, the program counter is already +set to the destination address, and the displacement is computed +relative to the branch target plus 2. Following is an example where +this detail leads to a literal that cannot be addressed: +\begin{verbatim} + bra Target + mov #$12345678,r4 ; is executed + . + . + ltorg ; here is the literal + . + . +Target: mov r4,r7 ; execution continues here +\end{verbatim} +As \tty{Target}+2 is on an address behind the literal, a negative +displacement would result. Things become especially hairy when one +of the branch instructions \tty{JMP, JSR, BRAF, or BSRF} is used: as AS +cannot calculate the target address (it is generated at runtime from +a register's contents), a PC value is assumed that should never fit, +effectively disabling any PC-relative addressing at this point. + +It is not possible to deduce the memory usage from the count and size +of literals. AS might need to insert a padding word to align a long +word to an address that is evenly divisible by 4; on the other hand, +AS might reuse parts of a 32-bit literal for other 16-bit literals. +Of course multiple use of a literal with a certain value will create +only one entry. However, such optimizations are completely +suppressed for forward references as AS does not know anything about +their value. + +As literals use the PC-relative addressing which is only allowed for +the \tty{MOV} instruction, the usage of literals is also limited to +\tty{MOV} instructions. The way AS uses the operand size is a bit tricky: +A specification of a byte or word move means to generate the shortest +possible instruction that results in the desired value placed in the +register's lowest 8 resp. 16 bits. The upper 24 resp. 16 bits are treated +as ''don't care''. However, if one specifies a longword move or omits the +size specification completely, this means that the complete 32-bit +register should contain the desired value. For example, in the following +sequence +\begin{verbatim} + mov.b #$c0,r0 + mov.w #$c0,r0 + mov.l #$c0,r0 , +\end{verbatim} +the first instruction will result in true immediate addressing, the +second and third instruction will use a word literal: As bit 7 in +the number is set, the byte instruction will effectively create the +value \$FFFFFFC0 in the register. According to the convention, this +wouldn't be the desired value in the second and third example. +However, a word literal is also sufficient for the third case because +the processor will copy a cleared bit 15 of the operand to bits +16..31. + +As one can see, the whole literal stuff is rather complex; I'm sorry but +there was no chance of making things simpler. It is unfortunately a +part of its nature that one sometimes gets error messages about +literals that were not found, which logically should not occur because +AS does the literal processing completely on his own. However, if +other errors occur in the second pass, all following labels will move +because AS does not generate any code any more for statements that +have been identified as erroneous. As literal names are partially built +from other symbols' values, other errors might follow because literal +names searched in the second pass differ from the names stored in the +first pass and AS quarrels about undefined symbols...if such errors +should occur, please correct all other errors first before you start +cursing on me and literals... + +People who come out of the Motorola scene and want to use PC-relative +addressing explicitly (e.g. to address variables in a position-independent +way) should know that if this addressing mode is written like in the +programmer's manual: +\begin{verbatim} + mov.l @(Var,PC),r8 +\end{verbatim} +\bb{no} implicit conversion of the address to a displacement will occur, +i.e. the operand is inserted as-is into the machine code (this will +probably generate a value range error...). If you want to use +PC-relative addressing on the SH7x00, simply use ''absolute'' +addressing (which does not exist on machine level): +\begin{verbatim} + mov.l Var,r8 +\end{verbatim} +In this example, the displacement will be calculated correctly (of +course, the same limitations apply for the displacement as it was the +case for literals). + +%%--------------------------------------------------------------------------- + +\section{MELPS-4500} + +The program memory of these microcontrollers is organized in pages of +128 words. Honestly said, this organization only exists because there +are on the one hand branch instructions with a target that must lie +within the same page, and on the other hand ''long'' branches that can +reach the whole address space. The standard syntax defined by +Mitsubishi demands that page number and offset have to be written as +two distinct arguments for the latter instructions. As this is +quite inconvenient (except for indirect jumps, a programmer has no +other reason to deal with pages), AS also allows to write the target +address in a ''linear'' style, for example +\begin{verbatim} + bl $1234 +\end{verbatim} +instead of +\begin{verbatim} + bl $24,$34 . +\end{verbatim} + +%%--------------------------------------------------------------------------- + +\section{6502UNDOC} + +Since the 6502's undocumented instructions naturally aren't listed in +any data book, they shall be listed shortly at this place. Of +course, you are using them on your own risk. There is no guarantee +that all mask revisions will support all variants! They anyhow do +not work for the CMOS successors of the 6502, since they allocated +the corresponding bit combinations with "official" instructions... + +The following symbols are used: + +\begin{tabbing} +\hspace{2cm} \= \kill +\& \> binary AND \\ +| \> binary OR \\ +\verb!^! \> binary XOR \\ +$<<$ \> logical shift left \\ +$>>$ \> logical shift right \\ +$<<<$ \> rotate left \\ +$>>>$ \> rotate right \\ +$\leftarrow$ \> assignment \\ + (..) \> contents of .. \\ + {..} \> bits .. \\ + A \> accumulator \\ + X,Y \> index registers X,Y \\ + S \> stack pointer \\ + An \> accumulator bit n \\ + M \> operand \\ + C \> carry \\ + PCH \> upper half of program counter \\ +\end{tabbing} + +\begin{tabbing} +Addressing Modes \= : \= \kill +Instruction \> : \> \tty{JAM} or \tty{KIL} or \tty{CRS} \\ +Function \> : \> none, prozessor is halted \\ +Addressing Modes \> : \> implicit \\ +\end{tabbing} + +\begin{tabbing} +Addressing Modes \= : \= \kill +Instruction \> : \> \tty{SLO} \\ +Function \> : \> $M\leftarrow((M)<<1)|(A)$ \\ +Addressing Modes \> : \> absolute long/short, X-indexed long/short, \\ + \> \> Y-indexed long, X/Y-indirect \\ +\end{tabbing} + +\begin{tabbing} +Addressing Modes \= : \= \kill +Instruction \> : \> \tty{ANC} \\ +Function \> : \> $A\leftarrow(A)\&(M), C\leftarrow A7$ \\ +Addressing Modes \> : \> immediate \\ +\end{tabbing} + +\begin{tabbing} +Addressing Modes \= : \= \kill +Instruction \> : \> \tty{RLA} \\ +Function \> : \> $M\leftarrow((M)<<1)\&(A)$ \\ +Addressing Modes \> : \> absolute long/short, X-indexed long/short, \\ + \> \> Y-indexed long, X/Y-indirect \\ +\end{tabbing} + +\begin{tabbing} +Addressing Modes \= : \= \kill +Instruction \> : \> \tty{SRE} \\ +Function \> : \> $M\leftarrow((M)>>1)$\verb!^!$(A)$ \\ +Addressing Modes \> : \> absolute long/short, X-indexed long/short, \\ + \> \> Y-indexed long, X/Y-indirect \\ +\end{tabbing} + +\begin{tabbing} +Addressing Modes \= : \= \kill +Instruction \> : \> \tty{ASR} \\ +Function \> : \> $A\leftarrow((A)\&(M))>>1$ \\ +Addressing Modes \> : \> immediate \\ +\end{tabbing} + +\begin{tabbing} +Addressing Modes \= : \= \kill +Instruction \> : \> \tty{RRA} \\ +Function \> : \> $M\leftarrow((M)>>>1)+(A)+(C)$ \\ +Addressing Modes \> : \> absolute long/short, X-indexed long/short, \\ + \> \> Y-indexed long, X/Y-indirect \\ +\end{tabbing} + +\begin{tabbing} +Addressing Modes \= : \= \kill +Instruction \> : \> \tty{ARR} \\ +Function \> : \> $A\leftarrow((A)\&(M))>>>1$ \\ +Addressing Modes \> : \> immediate \\ +\end{tabbing} + +\begin{tabbing} +Addressing Modes \= : \= \kill +Instruction \> : \> \tty{SAX} \\ +Function \> : \> $M\leftarrow(A)\&(X)$ \\ +Addressing Modes \> : \> absolute long/short, Y-indexed short, \\ + \> \> Y-indirect \\ +\end{tabbing} + +\begin{tabbing} +Addressing Modes \= : \= \kill +Instruction \> : \> \tty{ANE} \\ +Function \> : \> $M\leftarrow((A)\&\$ee)|((X)\&(M))$ \\ +Addressing Modes \> : \> immediate \\ +\end{tabbing} + +\begin{tabbing} +Addressing Modes \= : \= \kill +Instruction \> : \> \tty{SHA} \\ +Function \> : \> $M\leftarrow(A)\&(X)\&(PCH+1)$ \\ +Addressing Modes \> : \> X/Y-indexed long \\ +\end{tabbing} + +\begin{tabbing} +Addressing Modes \= : \= \kill +Instruction \> : \> \tty{SHS} \\ +Function \> : \> $X\leftarrow(A)\&(X), S\leftarrow(X), M\leftarrow(X)\&(PCH+1)$ \\ +Addressing Modes \> : \> Y-indexed long \\ +\end{tabbing} + +\begin{tabbing} +Addressing Modes \= : \= \kill +Instruction \> : \> \tty{SHY} \\ +Function \> : \> $M\leftarrow(Y)\&(PCH+1)$ \\ +Addressing Modes \> : \> Y-indexed long \\ +\end{tabbing} + +\begin{tabbing} +Addressing Modes \= : \= \kill +Instruction \> : \> \tty{SHX} \\ +Function \> : \> $M\leftarrow(X)\&(PCH+1)$ \\ +Addressing Modes \> : \> X-indexed long \\ +\end{tabbing} + +\begin{tabbing} +Addressing Modes \= : \= \kill +Instruction \> : \> \tty{LAX} \\ +Function \> : \> $A,X\leftarrow(M)$ \\ +Addressing Modes \> : \> absolute long/short, Y-indexed long/short, \\ + \> \> X/Y-indirect \\ +\end{tabbing} + +\begin{tabbing} +Addressing Modes \= : \= \kill +Instruction \> : \> \tty{LXA} \\ +Function \> : \> $X{04}\leftarrow(X){04}\&(M){04}$, \\ + \> \> $A{04}\leftarrow(A){04}\&(M){04}$ \\ +Addressing Modes \> : \> immediate \\ +\end{tabbing} + +\begin{tabbing} +Addressing Modes \= : \= \kill +Instruction \> : \> \tty{LAE} \\ +Function \> : \> $X,S,A\leftarrow((S)\&(M))$ \\ +Addressing Modes \> : \> Y-indexed long \\ +\end{tabbing} + +\begin{tabbing} +Addressing Modes \= : \= \kill +Instruction \> : \> \tty{DCP} \\ +Function \> : \> $M\leftarrow(M)-1, Flags\leftarrow((A)-(M))$ \\ +Addressing Modes \> : \> absolute long/short, X-indexed long/short, \\ + \> \> Y-indexed long, X/Y-indirect \\ +\end{tabbing} + +\begin{tabbing} +Addressing Modes \= : \= \kill +Instruction \> : \> \tty{SBX} \\ +Function \> : \> $X\leftarrow((X)\&(A))-(M)$ \\ +Addressing Modes \> : \> immediate \\ +\end{tabbing} + +\begin{tabbing} +Addressing Modes \= : \= \kill +Instruction \> : \> \tty{ISB} \\ +Function \> : \> $M\leftarrow(M)+1, A\leftarrow(A)-(M)-(C)$ \\ +Addressing Modes \> : \> absolute long/short, X-indexed long/short, \\ + \> \> Y-indexed long, X/Y-indirect \\ +\end{tabbing} + +%%--------------------------------------------------------------------------- + +\section{MELPS-740} + +Microcontrollers of this family have a quite nice, however well-hidden +feature: If one sets bit 5 of the status register with the \tty{SET} +instruction, the accumulator will be replaced with the memory cell +addressed by the X register for all load/store and arithmetic +instructions. An attempt to integrate this feature cleanly into the +assembly syntax has not been made so far, so the only way to use it +is currently the ''hard'' way (\tty{SET}...instructions with accumulator +addressing...\tty{CLT}). + +Not all MELPS-740 processors implement all instructions. This is a +place where the programmer has to watch out for himself that no +instructions are used that are unavailable for the targeted +processor; AS does not differentiate among the individual processors +of this family. For a description of the details regarding special +page addressing, see the discussion of the \tty{ASSUME} instruction. + +%%--------------------------------------------------------------------------- + +\section{MELPS-7700/65816} +\label{MELPS7700Spec} + +As it seems, these two processor families took disjunct development +paths, starting from the 6502 via their 8 bit predecessors. Shortly +listed, the following differences are present: +\begin{itemize} +\item{The 65816 does not have a B accumulator.} +\item{The 65816 does not have instructions to multiply or divide.} +\item{The 65816 misses the instructions \tty{SEB, CLB, BBC, BBS, CLM, SEM, + PSH, PUL} and \tty{LDM}. Instead, the instructions \tty{TSB, TRB, BIT, CLD, + SED, XBA, XCE} and \tty{STZ} take their places in the opcode table.} +\end{itemize} +The following instructions have identical function, yet different +names: +\par +\begin{center}\begin{tabular}{|c|c||c|c|} +\hline + 65816 & MELPS-7700 & 65816 & MELPS-7700 \\ +\hline +\hline + \tty{REP} & \tty{CLP} & \tty{PHK} & \tty{PHG} \\ + \tty{TCS} & \tty{TAS} & \tty{TSC} & \tty{TSA} \\ + \tty{TCD} & \tty{TAD} & \tty{TDC} & \tty{TDA} \\ + \tty{PHB} & \tty{PHT} & \tty{PLB} & \tty{PLT} \\ + \tty{WAI} & \tty{WIT} & & \\ +\hline +\end{tabular}\end{center} +\par +Especially tricky are the instructions \tty{PHB, PLB} and \tty{TSB}: these +instructions have a totally different encoding and meaning on both +processors! + +Unfortunately, these processors address their memory in a way that is +IMHO even one level higher on the open-ended chart of perversity than +the Intel-like segmentation: They do banking! Well, this seems to +be the price for the 6502 upward-compatibility; before one can use AS +to write code for these processors, one has to inform AS about the +contents of several registers (using the \tty{ASSUME} instruction): + +The M flag rules whether the accumulators A and B should be used with +8 bits (1) or 16 bits (0) width. Analogously, the X flag decides the +width of the X and Y index registers. AS needs this information for +the decision about the argument's width when immediate addressing +(\verb!#!) occurs. + +The memory is organized in 256 banks of 64 KBytes. As all registers +in the CPU core have a maximum width of 16 bits, the upper 8 bits +have to be fetched from 2 special bank registers: DT delivers the +upper 8 bits for data accesses, and PG extends the 16-bit program +counter to 24 bits. A 16 bits wide register DPR allows to move the +zero page known from the 6502 to an arbitrary location in the first +bank. If AS encounters an address (it is irrelevant if this address +is part of an absolute, indexed, or indirect expression), the +following addressing modes will be tested: +\begin{enumerate} +\item{Is the address in the range of DPR..DPR+\$ff? If yes, use direct + addressing with an 8-bit address.} +\item{Is the address contained in the page addressable via DT (resp. + PG for branch instructions)? If yes, use absolute addressing + with a 16-bit address.} +\item{If nothing else helps, use long addressing with a 24-bit + address.} +\end{enumerate} +As one can see from this enumeration, the knowledge about the current +values of DT, PG and DPR is essential for a correct operation of AS; +if the specifications are incorrect, the program will probably do +wrong addressing at runtime. This enumeration also implied that all +three address lengths are available; if this is not the case, the +decision chain will become shorter. + +The automatic determination of the address length described above may +be overridden by the usage of prefixes. If one prefixes the address +by a $<$, $>$, or $>>$ without a separating space, an address with 1, 2, or +3 bytes of length will be used, regardless if this is the optimal +length. If one uses an address length that is either not allowed for +the current instruction or too short for the address, an error +message is the result. + +To simplify porting of 6502 programs, AS uses the Motorola syntax for +hexadecimal constants instead of the Intel/IEEE syntax that is +the format preferred by Mitsubishi for their 740xxx series. I still +think that this is the better format, and it looks as if the +designers of the 65816 were of the same opinion (as the \tty{RELAXED} +instruction allows the alternative use of Intel notation, this +decision should not hurt anything). Another important detail for the +porting of programs is that it is valid to omit the accumulator A as +target for operations. For example, it is possible to simply write +\verb!LDA #0! instead of \verb!LDA A,#0!. + +A real goodie in the instruction set are the instructions \tty{MVN} resp. +\tty{MVP} to do block transfers. However, their address specification +rules are a bit strange: bits 0--15 are stored in index registers, +bits 16--23 are part of the instruction. When one uses AS, one +simply specifies the full destination and source addresses. AS will +then automatically grab the correct bits. This is a fine yet +important difference Mitsubishi's assembler where you have to +extract the upper 8 bits on your own. Things become really +convenient when a macro like the following is used: +\begin{verbatim} +mvpos macro src,dest,len + if MomCPU=$7700 + lda #len + elseif + lda #(len-1) + endif + ldx #(src&$ffff) + ldy #(dest&$ffff) + mvp dest,src + endm +\end{verbatim} +Caution, possible pitfall: if the accumulator contains the value n, +the Mitsubishi chip will transfer n bytes, but the 65816 will +transfer n+1 bytes! + +The \tty{PSH} and \tty{PUL} instructions are also very handy because they +allow to save a user-defined set to be saved to the stack resp. to be +restored from the stack. According to the Mitsubishi data book +\cite{Mit16}, the bit mask has to be specified as an immediate operand, so +the programmer either has to keep all bit$\leftrightarrow$register +assignments in mind or he has to define some appropriate symbols. To make +things simpler, I decided to extend the syntax at this point: It is valid +to use a list as argument which may contain an arbitrary sequence of +register names or immediate expressions. Therefore, the following +instructions +\begin{verbatim} + psh #$0f + psh a,b,#$0c + psh a,b,x,y + +\end{verbatim} +are equivalent. As immediate expressions are still valid, AS stays +upward compatible to the Mitsubishi assemblers. + +One thing I did not fully understand while studying the Mitsubishi +assembler is the treatment of the \tty{PER} instruction: this instruction +allows to push a 16-bit variable onto the stack whose address is +specified relative to the program counter. Therefore, it is an +absolute addressing mode from the programmer's point of view. +Nevertheless, the Mitsubishi assembler requests immediate addressing, +and the instructions argument is placed into the code just as-is. +One has to calculate the address in his own, which is something +symbolic assemblers were designed for to avoid...as I wanted to stay +compatible, AS contains a compromise: If one chooses immediate +addressing (with a leading \# sign), AS will behave like the original +from Mitsubishi. But if the \# sign is omitted, as will calculate the +difference between the argument's value and the current program +counter and insert this difference instead. + +A similar situation exists for the \tty{PEI} instruction that pushes the +contents of a 16-bit variable located in the zero page: Though the operand +represents an address, once again immediate addressing is required. In +this case, AS will simply allow both variants (i.e. with or without a \# +sign). + +%%--------------------------------------------------------------------------- + +\section{M16} + +The M16 family is a family of highly complex CISC processors with an +equally complicated instruction set. One of the instruction set's +properties is the detail that in an instruction with two operands, +both operands may be of different sizes. The method of appending the +operand size as an attribute of the instruction (known from Motorola +and adopted from Mitsubishi) therefore had to be extended: it is +valid to append attributes to the operands themselves. For example, +the following instruction +\begin{verbatim} + mov r0.b,r6.w +\end{verbatim} +reads the lowest 8 bits of register 0, sign-extends them to 32 bits +and stores the result into register 6. However, as one does not need +this feature in 9 out of 10 cases, it is still valid to append the +operand size to the instruction itself, e.g. +\begin{verbatim} + mov.w r0,r6 +\end{verbatim} +Both variants may be mixed; in such a case, an operand size appended +to an operand overrules the ''default''. An exception are instructions +with two operands. For these instructions, the default for the +source operand is the destination operand's size. For example, in +the following example +\begin{verbatim} + mov.h r0,r6.w +\end{verbatim} +register 0 is accessed with 32 bits, the size specification appended +to the instruction is not used at all. If an instruction does not +contain any size specifications, word size (\tty{w}) will be used. +Remember: in contrast to the 68000 family, this means 32 bits instead +of 16 bits! + +The chained addressing modes are also rather complex; the ability of +AS to automatically assign address components to parts of the chain +keeps things at least halfway manageable. The only way of influencing +AS allows (the original assembler from Mitsubishi/Green Hills allows +a bit more in this respect) is the explicit setting of displacement +lengths by appending \tty{:4, :16} and \tty{:32}. + +%%--------------------------------------------------------------------------- + +\section{4004} + +Another part of history...unfortunately, I wasn't able up to now to get my +hands on official documentation for world's first microprocessor, and +there are some details lacking: I'm not absolutely sure about the syntax +for register pairs (for 8-bit operations). The current syntax is +\tty{RnRm} with \tty{n} resp. \tty{m} being even integers in the range from +0 to E resp. 1 to F. The equation {\tt m = n + 1} has to be true always. + +%%--------------------------------------------------------------------------- + +\section{MCS-48} + +The maximum address space of these processors is 4 Kbytes large. This +address space is not organized in a linear way (how could this be on an +Intel CPU...). Instead, it is split into 2 banks of 2 Kbytes. The only +way to change the program counter from one bank to the other are the +instructions \tty{CALL} and \tty{JMP}, by setting the most significant bit +of the address with the instructions \tty{SEL MB0} resp. \tty{SEL MB1}. + +To simplify jumps between these two banks, the instructions \tty{JMP} and +\tty{CALL} contain an automatism that inserts one of these two instructions +if the current program counter and the target address are in +different banks. Explicit usage of these \tty{SEL MBx} instructions should +therefore not be necessary (though it is possible), and it can puzzle +the automatism, like in the following example: +\begin{verbatim} + 000: SEL MB1 + JMP 200h +\end{verbatim} +AS assumes that the MB flag is 0 and therefore does not insert a \tty{SEL +MBO} instruction, with the result that the CPU jumps to address +A00h. + +Furthermore, one should keep in mind that a jump instruction might +become longer (3 instead of 2 bytes). + +%%--------------------------------------------------------------------------- + +\section{MCS-51} + +The assembler is accompanied by the files \tty{STDDEF51.INC} resp. +\tty{80C50X.INC} that define all bits and SFRs of the processors 8051, +8052, and 80515 resp. 80C501, 502, and 504. Depending on the target +processor setting (made with the \tty{CPU} statement), the correct subset +will be included. Therefore, the correct order for the instructions +at the beginning of a program is +\begin{verbatim} + CPU + INCLUDE stddef51.inc . +\end{verbatim} +Otherwise, the MCS-51 pseudo instructions will lead to error +messages. + +As the 8051 does not have instructions to to push the registers 0..7 +onto the stack, one has to work with absolute addresses. However, +these addresses depend on which register bank is currently active. +To make this situation a little bit better, the include files define +the macro \tty{USING} that accepts the symbols \tty{Bank0...Bank3} as arguments. +In response, the macro will assign the registers' correct absolute +addresses to the symbols \tty{AR0..AR7}. This macro should be used after +every change of the register banks. The macro itself does \bb{not} +generate any code to switch to the bank! + +The macro also makes bookkeeping about which banks have been used. +The result is stored in the integer variable \tty{RegUsage}: bit 0 +corresponds to bank 0, bit 1 corresponds to bank 1. and so on. To +output its contents after the source has been assembled, use +something like the following piece of code: +\begin{verbatim} + irp BANK,Bank0,Bank1,Bank2,Bank3 + if (RegUsage&(2^BANK))<>0 + message "bank \{BANK} has been used" + endif + endm +\end{verbatim} +The multipass feature introduced with version 1.38 allowed to introduce +the additional instructions \tty{JMP} and \tty{CALL}. If branches are +coded using these instructions, AS will automatically use the variant that +is optimal for the given target address. The options are \tty{SJMP, +AJMP}, or \tty{LJMP} for \tty{JMP} resp. \tty{ACALL} or \tty{LCALL} for +\tty{CALL}. Of course it is still possible to use these variants +directly, in case one wants to force a certain coding. + +%%--------------------------------------------------------------------------- + +\section{MCS-251} + +When designing the 80C251, Intel really tried to make the move to +the new family as smooth as possible for programmers. This +culminated in the fact that old applications can run on the new +processor without having to recompile them. However, as soon as one +wants to use the new features, some details have to be regarded which +may turn into hidden pitfalls. + +The most important thing is the absence of a distinct address space +for bits on the 80C251. All SFRs can now be addressed bitwise, +regardless of their address. Furthermore, the first 128 bytes of the +internal RAM are also bit addressable. This has become possible +because bits are not any more handled by a separate address space +that overlaps other address spaces. Instead, similar to other +processors, bits are addressed with a two-dimensional address that +consists of the memory location containing the bit and the bit's +location in the byte. One result is that in an expression like +\tty{PSW.7}, AS will do the separation of address and bit position itself. +Unlike to the 8051, it is not any more necessary to explicitly +generate 8 bit symbols. This has the other result that the \tty{SFRB} +instruction does not exist any more. If it is used in a program that +shall be ported, it may be replaced with a simple \tty{SFR} instruction. + +Furthermore, Intel cleaned up the cornucopia of different address +spaces on the 8051: the internal RAM (\tty{DATA} resp. \tty{IDATA}), the +\tty{XDATA} space and the former \tty{CODE} space were unified to a single +\tty{CODE} space that is now 16 Mbytes large. The internal RAM starts at +address 0, the internal ROM starts at address ff0000h, which is the +address code has to be relocated to. In contrast, the SFRs were moved to +a separate address space (which AS refers to as the \tty{IO} segment). +However, they have the same addresses in this new address space as they +used to have on the 8051. The \tty{SFR} instructions knows of this +difference and automatically assigns symbols to either the \tty{DATA} or +\tty{IO} segment, depending on the target processor. As there is no +\tty{BIT} segment any more, the \tty{BIT} instruction operates completely +different: Instead of a linear address ranging from 0..255, a bit symbol +now contains the byte's address in bit 0..7, and the bit position in bits +24..26. Unfortunately, creating arrays of flags with a symbolic address +is not that simple any more: On an 8051, one simply wrote: +\begin{verbatim} + segment bitdata + +bit1 db ? +bit2 db ? + +or + +defbit macro name +name bit cnt +cnt set cnt+1 + endm +\end{verbatim} +On a 251, only the second way still works, like this: + \begin{verbatim} +adr set 20h ; start address of flags +bpos set 0 ; in the internal RAM + +defbit macro name +name bit adr.bpos +bpos set bpos+1 + if bpos=8 +bpos set 0 +adr set adr+1 + endif + endm +\end{verbatim} +Another small detail: Intel now prefers \tty{CY} instead of \tty{C} as a +symbolic name for the carry, so you might have to rename an already +existing variable of the same name in your program. However, AS will +continue to understand also the old variant when using the instructions +\tty{CLR, CPL, SETB, MOV, ANL,} or \tty{ORL}. The same is conceptually +true for the additional registers \tty{R8..R15, WR0..WR30, DR0..DR28, DR56, +DR60, DPX,} and \tty{SPX}. + +Intel would like everyone to write absolute addresses in a syntax of +\tty{XX:YYYY}, where \tty{XX} is a 64K bank in the address space resp. +signifies addresses in the I/O space with an \tty{S}. As one might guess, +I am not amused about this, which is why it is legal to alternitavely use +linear addresses in all places. Only the \tty{S} for I/O addresses is +incircumventable, like in this case: +\begin{verbatim} +Carry bit s:0d0h.7 +\end{verbatim} +Without the prefix, AS would assume an address in the \tty{CODE} segment, +and only the first 128 bits in this space are bit-addressable... + +Like for the 8051, the generic branch instructions \tty{CALL} and +\tty{JMP} exist that automatically choose the shortest machine code +depending on the address layout. However, while \tty{JMP} also may use +the variant with a 24-bit address, \tty{CALL} will not do this for a good +reason: In contrast to \tty{ACALL} and \tty{LCALL}, \tty{ECALL} places an +additional byte onto the stack. A \tty{CALL} instruction would result where +you would not know what it will do. This problem does not exist for the +\tty{JMP} instructions. + +There is one thing I did not understand: The 80251 is also able to +push immediate operands onto the stack, and it may push either single +bytes or complete words. However, the same mnemonic (\tty{PUSH}) is +assigned to both variants - how on earth should an assembler know if +an instruction like +\begin{verbatim} + push #10 +\end{verbatim} +shall push a byte or a word containing the value 10? So the current +rule is that \tty{PUSH} always pushes a byte; if one wants to push a word, +simply use \tty{PUSHW} instead of \tty{PUSH}. + +Another well-meant advise: If you use the extended instruction set, +be sure to operate the processor in source mode; otherwise, all +instructions will become one byte longer! The old 8051 instructions +that will in turn become one byte longer are not a big matter: AS +will either replace them automatically with new, more general +instructions or they deal with obsolete addressing modes (indirect +addressing via 8 bit registers). + +%%--------------------------------------------------------------------------- + +\section{8086..V35} + +Actually, I had sworn myself to keep the segment disease of Intel's +8086 out of the assembler. However, as there was a request and as +students are more flexible than the developers of this processor +obviously were, there is now a rudimentary support of these +processors in AS. When saying, 'rudimentary', it does not mean that +the instruction set is not fully covered. It means that the whole +pseudo instruction stuff that is available when using MASM, TASM, or +something equivalent does not exist. To put it in clear words, AS +was not primarily designed to write assembler programs for PC's +(heaven forbid, this really would have meant reinventing the wheel!); +instead, the development of programs for single-board computers was +the main goal (which may also be equipped with an 8086 CPU). + +For die-hards who still want to write DOS programs with AS, here is a +small list of things to keep in mind: +\begin{itemize} +\item{Only \tty{COM} files may be created.} +\item{Only use the \tty{CODE} segment, and place also all variables in + this segment.} +\item{DOS initializes all segment registers to the code segment. + An \tty{ASSUME DS:DATA, SS:DATA} right at the program's beginning + is therefore necessary.} +\item{DOS loads the code to a start address of 100h. An \tty{ORG} to this + address is absolutely necessary.} +\item{The conversion to a binary file is done with P2BIN (see later in + this document), with an address filter of \tty{\$-\$}.} +\end{itemize} +For these processors, AS only supports a small programming model, i.e. +there is \bb{one} code segment with a maximum of 64 Kbytes and a data +segment of equal size for data (which cannot be set to initial values for +\tty{COM} files). The \tty{SEGMENT} instruction allows to switch between +these two segments. From this facts results that branches are always +intrasegment branches if they refer to targets in this single code +segment. In case that far jumps should be necessary, they are possible +via \tty{CALLF} or \tty{JMPF} with a memory address or a +\tty{Segment:Offset} value as argument. + +Another big problem of these processors is their assembler syntax, +which is sometimes ambiguous and whose exact meaning can then only be +deduced by looking at the current context. In the following example, +either absolute or immediate addressing may be meant, depending on +the symbol's type: +\begin{verbatim} + mov ax,value +\end{verbatim} +When using AS, an expression without brackets always is interpreted +as immediate addressing. For example, when either a variable's +address or its contents shall be loaded, the differences listed in table +\ref{TabMASM} are present between MASM and AS: +\begin{table*} +\begin{center}\begin{tabular}{|l|l|l|} +\hline +assembler & address & contents \\ +\hline +\hline +MASM & \tty{mov ax,offset vari} & \tty{mov ax,vari} \\ + & \tty{lea ax,vari} & \tty{mov ax,[vari]} \\ + & \tty{lea ax,[vari]} & \\ + & & \\ +AS & \tty{mov ax,vari} & \tty{mov ax,[vari]} \\ + & \tty{lea ax,[vari]} & \\ +\hline +\end{tabular}\end{center} +\caption{Differences AS$\leftrightarrow$MASM Concerning Addressing + Syntax\label{TabMASM}} +\end{table*} +\par +When addressing via a symbol, the assembler checks whether they are +assigned to the data segment and tries to automatically insert an +appropriate segment prefix. This happens for example when symbols +from the code segment are accessed without specifying a \tty{CS} segment +prefix. However, this mechanism can only work if the \tty{ASSUME} +instruction (see there) has previously been applied correctly. + +The Intel syntax also requires to store whether bytes or words were +stored at a symbol's address. AS will do this only when the \tty{DB} resp. +\tty{DW} instruction is in the same source line as the label. For any +other case, the operand size has to be specified explicitly with the +\tty{BYTE PTR, WORD PTR,...} operators. As long as a register is the other +operator, this may be omitted, as the operand size is then clearly +given by the register's name. + +In an 8086-based system, the coprocessor is usually synchronized via +via the processor's TEST input line which is connected to toe +coprocessor's BUSY output line. AS supports this type of handshaking +by automatically inserting a \tty{WAIT} instruction prior to every 8087 +instruction. If this is undesired for any reason, an \tty{N} has to be +inserted after the \tty{F} in the mnemonic; for example, +\begin{verbatim} + FINIT + FSTSW [vari] +\end{verbatim} +becomes +\begin{verbatim} + FNINIT + FNSTSW [vari] +\end{verbatim} +This variant is valid for \bb{all} coprocessor instructions. + +%%--------------------------------------------------------------------------- + +\section{8X30x} +\label{8X30xSpec} + +The processors of this family have been optimized for an easy manipulation +of bit groups at peripheral addresses. The instructions \tty{LIV} and +\tty{RIV} were introduced to deal with such objects in a symbolic fashion. +They work similar to \tty{EQU}, however they need three parameters: +\begin{enumerate} +\item{the address of the peripheral memory cell that contains the bit + group (0..255);} +\item{the number of the group's first bit (0..7);} +\item{the length of the group, expressed in bits (1..8).} +\end{enumerate} +\bb{CAUTION!} The 8X30x does not support bit groups that span over more +than one memory address. Therefore, the valid value range for the +length can be stricter limited, depending on the start position. AS +does \bb{not} perform any checks at this point, you simply get strange +results at runtime! + +Regarding the machine code, length and position are expressed vis a 3 +bit field in the instruction word and a proper register number (\tty{LIVx} +resp. \tty{RIVx}). If one uses a symbolic object, AS will automatically +assign correct values to this field, but it is also allowed to +specify the length explicitly as a third operand if one does not work +with symbolic objects. If AS finds such a length specification in +spite of a symbolic operand, it will compare both lengths and issue +an error if they do not match (the same will happen for the MOVE +instruction if two symbolic operands with different lengths are used +- the instruction simply only has a single length field...). + +Apart from the real machine instructions, AS defines similarly to its +''idol'' MCCAP some pseudo instructions that are implemented as builtin +macros: +\begin{itemize} +\item{\tty{NOP} is a shortform for \tty{MOVE AUX,AUX}} +\item{\tty{HALT} is a shortform for \tty{JMP \verb!*!}} +\item{\tty{XML ii} is a shortform for \tty{XMIT ii,R12} (only 8X305)} +\item{\tty{XMR ii} is a shortform for \tty{XMIT ii,R13} (only 8X305)} +\item{\tty{SEL $<$busobj$>$} is a shortform for \tty{XMIT $<$adr$>$,IVL/IVR}, + i.e. it performs the necessary preselection to access $<$busobj$>$.} +\end{itemize} +The \tty{CALL} and \tty{RTN} instructions MCCAP also implements are +currently missing due to sufficient documentation. The same is true for a +set of pseudo instructions to store constants to memory. Time may change +this... + +%%--------------------------------------------------------------------------- + +\section{XA} + +Similar to its predecessor MCS/51, but in contrast to its +'competitor' MCS/251, the Philips XA has a separate address space for +bits, i.e. all bits that are accessible via bit instructions have a +certain, one-dimensional address which is stored as-is in the machine +code. However, I could not take the obvious opportunity to offer +this third address space (code and data are the other two) as a +separate segment. The reason is that - in contrast to the MCS/51 - +some bit addresses are ambiguous: bits with an address from 256 to 511 +refer to the bits of memory cells 20h..3fh in the current data +segment. This means that these addresses may correspond to different +physical bits, depending on the current state. Defining bits with +the help of \tty{DC} instructions - something that would be possible with a +separate segment - would not make too much sense. However, the \tty{BIT} +instruction still exists to define individual bits (regardless if +they are located in a register, the RAM or SFR space) that can then +be referenced symbolically. If the bit is located in RAM, the +address of the 64K-bank is also stored. This way, AS can check +whether the DS register has previously be assigned a correct value +with an \tty{ASSUME} instruction. + +In contrast, nothing can stop AS's efforts to align potential branch +targets to even addresses. Like other XA assemblers, AS does this by +inserting \tty{NOP}s right before the instruction in question. + +%%--------------------------------------------------------------------------- + +\section{AVR} + +In contrast to the AVR assembler, AS by default uses the Intel format +to write hexadecimal contants instead of the C syntax. All right, I +did not look into the (free) AVR assembler before, but when I started +with the AVR part, there was hardly mor einformation about the AVR +than a preliminary manual describing processor types that were never +sold...this problem can be solved with a simple RELAXED ON. + +Optionally, AS can generate so-called "object files" for the AVRs (it +also works for other CPUs, but it does not make any sense for them...). +These are files containing code and source line info what e.g. allows +a step-by-step execution on source level with the WAVRSIM simulator +delivered by Atmel. Unfortunately, the simulator seems to have +trouble with source file names longer than approx. 20 characters: +Names are truncated and/or extended by strange special characters +when the maximum length is exceeded. AS therefore stores file name +specifications in object files without a path specification. +Therefore, problems may arise when files like includes are not in the +current directory. + +A small specialty are machine instructions that have already been defined +by Atmel as part of the architecture, but up to now haven't been +implemented in any of the family's members. The instructions in question +are {\tt MUL, JMP,} and {\tt CALL}. Considering the latter ones, one may +ask himself how to reach the 4 Kwords large address space of the AT90S8515 +when the 'next best' instructions {\tt RJMP} and {\tt RCALL} can only +branch up to 2 Kwords forward or backward. The trick is named 'discarding +the upper address bits' and described in detail with the {\tt WRAPMODE} +statement. + +%%--------------------------------------------------------------------------- + +\section{Z80UNDOC} + +As one might guess, Zilog did not make any syntax definitions for the +undocumented instructions; furthermore, not everyone might know the +full set. It might therefore make sense to list all instructions at +this place: + +Similar to a Z380, it is possible to access the byte halves of IX and +IY separately. In detail, these are the instructions that allow +this: +\begin{verbatim} + INC Rx LD R,Rx LD Rx,n + DEC Rx LD Rx,R LD Rx,Ry + ADD/ADC/SUB/SBC/AND/XOR/OR/CP A,Rx +\end{verbatim} +\tty{Rx} and \tty{Ry} are synonyms for \tty{IXL, IXU, IYL} or \tty{IYU}. +Keep however in mind that in the case of \tty{LD Rx,Ry}, both registers +must be part of the same index register. + +The coding of shift instructions leaves an undefined bit combination which +is now accessible as the \tty{SLIA} instruction. \tty{SLIA} works like +\tty{SLA} with the difference of entering a 1 into bit position 0. Like +all other shift instructions, \tty{SLIA} also allows another undocumented +variant: +\begin{verbatim} + SLIA R,(XY+d) +\end{verbatim} +In this case, \tty{R} is an arbitrary 8-bit register (excluding index +register halves...), and \tty{(XY+d)} is a normal indexed address. This +operation has the additional effect of copying the result into the +register. This also works for the \tty{RES} and \tty{SET} instructions: +\begin{verbatim} + SET/RES R,n,(XY+d) +\end{verbatim} +Furthermore, two hidden I/O instructions exist: +\begin{verbatim} + IN (C) resp. TSTI + OUT (C),0 +\end{verbatim} +Their operation should be clear. \bb{CAUTION!} Noone can +guarantee that all mask revisions of the Z80 execute these +instructions, and the Z80's successors will react with traps if they +find one of these instructions. Use them on your own risk... + +%%--------------------------------------------------------------------------- + +\section{Z380} + +As this processor was designed as a grandchild of the still most popular +8-bit microprocessor, it was a sine-qua-non design target to execute +existing Z80 programs without modification (of course, they execute a bit +faster, roughly by a factor of 10...). Therefore, all extended features +can be enabled after a reset by setting two bits which are named XM +(eXtended Mode, i.e. a 32-bit instead of a 16-bit address space) +respectively LW (long word mode, i.e. 32-bit instead of 16-bit operands). +One has to inform AS about their current setting with the instructions +\tty{EXTMODE} resp. \tty{LWORDMODE}, to enable AS to check addresses and +constants against the correct upper limits. The toggle between 32- and +16-bit instruction of course only influences instructions that are +available in a 32-bit variant. Unfortunately, the Z380 currently offers +such variants only for load and store instructions; arithmetic can only be +done in 16 bits. Zilog really should do something about this, otherwise +the most positive description for the Z380 would be ''16-bit processor +with 32-bit extensions''... + +The whole thing becomes complicated by the ability to override the operand +size set by LW with the instruction prefixes \tty{DDIR W} resp. +\tty{DDIR LW}. AS will note the occurrence of such instructions and will +toggle setting for the instruction following directly. By the way, one +should never explicitly use other \tty{DDIR} variants than \tty{W} resp. +\tty{LW}, as AS will introduce them automatically when an operand is +discovered that is too long. Explicit usage might puzzle AS. The +automatism is so powerful that in a case like this: +\begin{verbatim} + DDIR LW + LD BC,12345678h , +\end{verbatim} +the necessary \tty{IW} prefix will automatically be merged into the previous +instruction, resulting in +\begin{verbatim} + DDIR LW,IW + LD BC,12345668h . +\end{verbatim} +The machine code that was first created for \tty{DDIR LW} is retracted and +replaced, which is signified with an \tty{R} in the listing. + +%%--------------------------------------------------------------------------- + +\section{TLCS-900(L)} +\label{TLCS900Spec} + +These processors may run in two operating modes: on the one hand, in +minimum mode, which offers almost complete source code compatibility +to the Z80 and TLCS-90, and on the other hand in maximum mode, which +is necessary to make full use of the processor's capabilities. The +main differences between these two modes are: +\begin{itemize} +\item{width of the registers WA, BC, DE, and HL: 16 or 32 bits;} +\item{number of register banks: 8 or 4;} +\item{code address space: 64 Kbytes or 16 Mbytes;} +\item{length of return addresses: 16 or 32 bits.} +\end{itemize} +To allow AS to check against the correct limits, one has to inform him +about the current execution mode via the \tty{MAXMODE} instruction (see +there). The default is the minimum mode. + +From this follows that, depending on the operating mode, the 16-bit +resp. 32-bit versions of the bank registers have to be used for +addressing, i.e. WA, BC, DE and HL for the minimum mode resp. XWA, +XBC, XDE and XHL for the maximum mode. The registers XIX..XIZ and +XSP are \bb{always} 32 bits wide and therefore always have to to be used +in this form for addressing; in this detail, existing Z80 code +definitely has to be adapted (not including that there is no I/O +space and all I/O registers are memory-mapped...). + +The syntax chosen by Toshiba is a bit unfortunate in the respect of +choosing an single quote (') to reference the previous register bank. The +processor independent parts of AS already use this character to mark +character constants. In an instruction like +\begin{verbatim} + ld wa',wa , +\end{verbatim} +AS will not recognize the comma for parameter separation. This +problem can be circumvented by usage of an inverse single quote (`), for +example +\begin{verbatim} + ld wa`,wa +\end{verbatim} +Toshiba delivers an own assembler for the TLCS-900 series (TAS900), +which is different from AS in the following points: + +\subsubsection{Symbol Conventions} + +\begin{itemize} +\item{TAS900 differentiates symbol names only on the first 32 + characters. In contrast, AS always stores symbol names with the + full length (up to 255 characters) and uses them all for + differentiation.} +\item{TAS900 allows to write integer constants either in Intel or C + notation (with a 0 prefix for octal or a 0x prefix for hexadecimal + constants). By default, AS only supports the Intel notation. + With the help of the \tty{RELAXED} instruction, one also gets the C + notation (among other).} +\item{AS does not distinguish between upper and lower case. In + contrast, TAS900 differentiates between upper- and lowercase + letters in symbol names. One needs to engage the \tty{-u} command + line option to force AS to do this.} +\end{itemize} + +\subsubsection{Syntax} + +For many instructions, the syntax checking of AS is less strict than +the checking of TAS900. In some (rare) cases, the syntax is slightly +different. These extensions and changes are on the one hand for the +sake of a better portability of existing Z80 codes, on the other hand +they provide a simplification and better orthogonality of the +assembly syntax: +\begin{itemize} +\item{In the case of \tty{LDA, JP}, and \tty{CALL}, TAS requires that address + expressions like \tty{XIX+5} must not be placed in parentheses, as it + is usually the case. For the sake of better orthogonality, AS + requires parentheses for \tty{LDA}. They are optional if \tty{JP} resp. + \tty{CALL} are used with a simple, absolute address.} +\item{In the case of \tty{JP, CALL, JR}, and \tty{SCC}, AS leaves the choice to the + programmer whether to explicitly write out the default condition + \tty{T} (= true) as first parameter or not. TAS900 in contrast only + allows to use the default condition implicitly (e.g. \tty{jp (xix+5)} + instead of \tty{jp t,(xix+5))}.} +\item{For the \tty{EX} instruction, AS allows operand combinations which are + not listed in \cite{Tosh900} but can be reduced to a standard + combination by swapping the operands. Combinations like \tty{EX f`,f} + or \tty{EX wa,(xhl)} become possible. In contrast, TAS900 limits to + the 'pure' combinations.} +\item{AS allows to omit an increment resp. decrement of 1 when using the + instructions \tty{INC} and \tty{DEC}. TAS900 instead forces the programmer to + explicit usage of '1'.} +\item{The similar is true for the shift instructions: If the operand is + a register, TAS900 requires that even a shift count of 1 has to + be written explicitly; however, when the operand is in memory, + the hardware limits the shift count to 1 which must not be written + in this case. With AS, a shift count of 1 is always optional and + valid for all types of operands.} +\end{itemize} + +\subsubsection{Macro Processor} + +The macro processor of TAS900 is an external program that operates +like a preprocessor. It consists of two components: The first one is +a C-like preprocessor, and the second one is a special macro language +(MPL) that reminds of high level languages. The macro processor of +AS instead is oriented towards ''classic'' macro assemblers like MASM +or M80 (both programs from Microsoft). It is a fixed component of +AS. + +\subsubsection{Output Format} + +TAS900 generates relocatable code that allows to link separately +compiled programs to a single application. AS instead generates +absolute machine code that is not linkable. There are currently no +plans to extend AS in this respect. + +\subsubsection{Pseudo Instructions} + +Due to the missing linker, AS lacks a couple of pseudo instructions +needed for relocatable code TAS900 implements. The following +instructions are available with equal meaning: +\begin{quote}\tt + EQU, DB, DW, ORG, ALIGN, END, TITLE, SAVE, RESTORE +\rm\end{quote} +The latter two have an extended functionality for AS. Some TAS900 +pseudo instructions can be replaced with equivalent AS instructions (see +table \ref{TabTAS900}). +\par +\begin{table*}[htbp] +\begin{center}\begin{tabular}{|l|l|l|} +\hline +TAS900 & AS & meaning/function \\ +\hline +\hline +\tty{DL} $<$Data$>$ & \tty{DD} $<$Data$>$ & define longword constants \\ +\hline +\tty{DSB} $<$number$>$ & \tty{DB} $<$number$>$ \tty{DUP} (?) & reserve bytes of memory \\ +\hline +\tty{DSW} $<$number$>$ & \tty{DW} $<$number$>$ \tty{DUP} (?) & reserve words of memory \\ +\hline +\tty{DSD} $<$number$>$ & \tty{DD} $<$number$>$ \tty{DUP} (?) & reserve longwords of memory \\ +\hline +\tty{\$MIN[IMUM]} & \tty{MAXMODE OFF} & following code runs \\ + & & in minimum mode \\ +\hline +\tty{\$MAX[IMUM]} & \tty{MAXMODE ON} & following code runs \\ + & & in maximum mode \\ +\hline +\tty{\$SYS[TEM]} & \tty{SUPMODE ON} & following code runs \\ + & & in system mode \\ +\hline +\tty{\$NOR[MAL]} & \tty{SUPMODE OFF} & following code runs \\ + & & in user mode \\ +\hline +\tty{\$NOLIST} & \tty{LISTING OFF} & turn off assembly listing \\ +\hline +\tty{\$LIST} & \tty{LISTING ON} & turn on assembly listing \\ +\hline +\tty{\$EJECT} & \tty{NEWPAGE} & start new page in listing \\ +\hline +\end{tabular}\end{center} +\caption{equivalent instructions TAS900$\leftrightarrow$AS\label{TabTAS900}} +\end{table*} +Toshiba manufactures two versions of the processor core, with the L +version being an ''economy version''. AS will make the following +differences between TLCS-900 and TLCS-900L: +\begin{itemize} +\item{The instructions \tty{MAX} and \tty{NORMAL} are not allowed for the L version; + the \tty{MIN} instruction is disabled for the full version.} +\item{The L version does not know the normal stack pointer XNSP/NSP, but + instead has the interrupt nesting register INTNEST.} +\end{itemize} +The instructions \tty{SUPMODE} and \tty{MAXMODE} are not influenced, just as +their initial setting \tty{OFF}. The programmer has to take care of the +fact that the L version starts in maximum mode and does not have a +normal mode. However, AS shows a bit of mercy against the L variant +by suppressing warnings for privileged instructions. + +%%--------------------------------------------------------------------------- + +\section{TLCS-90} + +Maybe some people might ask themselves if I mixed up the order a +little bit, as Toshiba first released the TLCS-90 as an extended Z80 +and afterwards the 16-bit version TLCS-900. Well, I discovered the +'90 via the '900 (thank you Oliver!). The two families are quite +similar, not only regarding their syntax but also in their +architecture. The hints for the '90 are therefore a subset of of the +chapter for the '900: As the '90 only allows shifts, increments, and +decrements by one, the count need not and must not be written as the +first argument. Once again, Toshiba wants to omit parentheses for +memory operands of \tty{LDA, JP, and CALL}, and once again AS requires them +for the sake of orthogonality (the exact reason is of course that +this way, I saved an extra in the address parser, but one does not +say such a thing aloud). + +Principally, the TLCS-90 series already has an address space of 1 +Mbyte which is however only accessible as data space via the index +registers. AS therefore does not regard the bank registers and +limits the address space to 64 Kbytes. This should not limit too +much as this area above is anyway only reachable via indirect +addressing. + +%%--------------------------------------------------------------------------- + +\section{TLCS-870} + +Once again Toshiba...a company quite productive at the moment! +Especially this branch of the family (all Toshiba microcontrollers +are quite similar in their binary coding and programming model) seems +to be targeted towards the 8051 market: the method of separating the +bit position from the address expression with a dot had its root in +the 8051. However, it creates now exactly the sort of problems I +anticipated when working on the 8051 part: On the one hand, the dot +is a legal part of symbol names, but on the other hand, it is part of +the address syntax. This means that AS has to separate address and +bit position and must process them independently. Currently, I +solved this conflict by seeking the dot starting at the \bb{end} of the +expression. This way, the last dot is regarded as the separator, and +further dots stay parts of the address. I continue to urge everyone +to omit dots in symbol names, they will lead to ambiguities: +\begin{verbatim} + LD CF,A.7 ; accumulator bit 7 to carry + LD C,A.7 ; constant 'A.7' to accumulator +\end{verbatim} + +%%--------------------------------------------------------------------------- + +\section{TLCS-47} + +This family of 4-bit microcontrollers should mark the low end of what +is supportable by AS. Apart from the \tty{ASSUME} instruction for the data +bank register (see there), there is only one thing that is worth +mentioning: In the data and I/O segment, nibbles are reserved instead +of byte (it's a 4-bitter...). The situation is similar to the bit +data segment of the 8051, where a \tty{DB} reserves a single bit, with the +difference that we are dealing with nibbles. + +Toshiba defined an ''extended instruction set'' for this processor +family to facilitate the work with their limited instruction set. In +the case of AS, it is defined in the include file \tty{STDDEF47.INC}. +However, some instructions that could not be realized as macros are +''builtins'' and are therefore also available without the include file: +\begin{itemize} +\item{the \tty{B} instruction that automatically chooses the optimal version + of the jump instruction (\tty{BSS; BS}, or \tty{BSL});} +\item{\tty{LD} in the variant of \tty{HL} with an immediate operand;} +\item{\tty{ROLC} and \tty{RORC} with a shift amplitude higher than one.} +\end{itemize} + +%%--------------------------------------------------------------------------- + +\section{TLCS-9000} + +This is the first time that I implemented a processor for AS which +was not available at that point of time. Unfortunately, Toshiba +decided to put this processor ''on ice'', so we won't see any silicon +in the near future. This has of course the result that this part +\begin{enumerate} +\item{is a ''paper design'', i.e. there was so far no chance to test + it in the reality and} +\item{the documentation for the '9000 I could get hold of \cite{Tosh9000} + were preliminary, so they could not deliver clarity on every + detail.} +\end{enumerate} +Therefore, errors in this code generator are quite possible (and will +of course be fixed if it should ever become possible!). At least the +few examples listed in \cite{Tosh9000} are assembled correctly. + +%%--------------------------------------------------------------------------- + +\section{29xxx} + +As it was already described in the discussion of the \tty{ASSUME} +instruction, AS can use the information about the current setting of +the RBP register to detect accesses to privileged registers in user +mode. This ability is of course limited to direct accesses (i.e. +without using the registers IPA...IPC), and there is one more +pitfall: as local registers (registers with a number $>$127) are +addressed relative to the stack pointer, but the bits in RBP always +refer to absolute numbers, the check is NOT done for local registers. +An extension would require AS to know always the absolute value of +SP, which would at least fail for recursive subroutines... + +%%--------------------------------------------------------------------------- + +\section{80C16x} + +As it was already explained in the discussion of the \tty{ASSUME} +instruction, AS tries to hide the fact that the processor has more +physical than logical RAM as far as possible. Please keep in mind +that the DPP registers are valid only for data accesses and only +have an influence on absolute addressing, neither on indirect nor on indexed +addresses. AS cannot know which value the computed address may take +at runtime... +The paging unit unfortunately does not operate for code accesses so +one has to work with explicit long or short \tty{CALL}s, \tty{JMP}s, or +\tty{RET}s. At least for the ''universal'' instructions \tty{CALL} and +\tty{JMP}, AS will automatically use the shortest variant, but at least for the RET one +should know where the call came from. \tty{JMPS} and \tty{CALLS} principally +require to write segment and address separately, but AS is written in +a way that it can split an address on its own, e.g. one can write +\begin{verbatim} + jmps 12345h +\end{verbatim} +instead of +\begin{verbatim} + jmps 1,2345h +\end{verbatim} +Unfortunately, not all details of the chip's internal instruction +pipeline are hidden: if CP (register bank address), SP (stack), or +one of the paging registers are modified, their value is not +available for the instruction immediately following. AS tries to +detect such situations and will issue a warning in such cases. Once +again, this mechanism only works for direct accesses. + +Bits defined with the \tty{BIT} instruction are internally stored as a +12-bit word, containing the address in bits 4..11 and the bit +position in the four LSBs. This order allows to refer the next resp. +previous bit by incrementing or decrementing the address. This will +however not work for explicit bit specifications when a word boundary +is crossed. For example, the following expression will result in a +range check error: +\begin{verbatim} + bclr r5.15+1 +\end{verbatim} +We need a \tty{BIT} in this situation: +\begin{verbatim} +msb bit r5.15 + . + . + bclr msb+1 +\end{verbatim} +The SFR area was doubled for the 80C167/165/163: bit 12 flags that a bit +lies in the second part. Siemens unfortunately did not foresee that +256 SFRs (128 of them bit addressable) would not suffice for +successors of the 80C166. As a result, it would be impossible to +reach the second SFR area from F000H..F1DFH with short addresses or +bit instructions if the developers had not included a toggle +instruction: +\begin{verbatim} + EXTR #n +\end{verbatim} +This instruction has the effect that for the next \tty{n} instructions +($0warning + exts #1,#1 ; range from 64K..128K + mov r0,1cdefh ; results in address 0cdefh in code + mov r0,1cdefh ; -->warning +\end{verbatim} + +%%--------------------------------------------------------------------------- + +\section{PIC16C5x/16C8x} + +Similar to the MCS-48 family, the PICs split their program memory +into several banks because the opcode does not offer enough space for +a complete address. AS uses the same automatism for the instructions +\tty{CALL} and \tty{GOTO}, i.e. the PA bits in the status word are set according +to the start and target address. However, this procedure is far more +problematic compared to the 48's: +\begin{enumerate} +\item{The instructions are not any more one word long (up to three + words). Therefore, it is not guaranteed that they can be + skipped with a conditional branch.} +\item{It is possible that the program counter crosses a page boundary + while the program sequence is executed. The setting of PA bits + AS assumes may be different from reality.} +\end{enumerate} +The instructions that operate on register W and another register +normally require a second parameter that specifies whether the result +shall be stored in W or the register. Under AS, it is valid to omit +the second parameter. The assumed target then depends upon the +operation's type: For unary operations, the result is by default +stored back into the register. These instructions are: +\begin{quote}{\tt + COMF, DECF, DECFSZ, INCF, INCFSZ, RLF, RRF, and SWAPF +}\end{quote} +The other operations by default regard W as an accumulator: +\begin{quote}{\tt + ADDWF, ANDWF, IORWF, MOVF, SUBWF, and XORWF +}\end{quote} +The syntax defined by Microchip to write literals is quite obscure +and reminds of the syntax used on IBM 360/370 systems (greetings from +the stone-age...). To avoid introducing another branch into the +parser, with AS one has to write constants in the Motorola syntax +(optionally Intel or C in \tty{RELAXED} mode). + +%%--------------------------------------------------------------------------- + +\section{PIC 17C4x} + +With two exceptions, the same hints are valid as for its two smaller +brothers: the corresponding include file only contains register +definitions, and the problems concerning jump instructions are much +smaller. The only exception is the \tty{LCALL} instruction, which allows a +jump with a 16-bit address. It is translated with the following +''macro'': +\begin{verbatim} + MOVLW + MOWF 3 + LCALL +\end{verbatim} + +%%--------------------------------------------------------------------------- + +\section{ST6} + +These processors have the ability to map their code ROM pagewise into the +data area. I am not keen on repeating the whole discussion of the +\tty{ASSUME} instruction at this place, so I refer to the corresponding +section (\ref{ST6Assume}) for an explanation how to read constants out of +the code ROM without too much headache. + +Some builtin ''macros'' show up when one analyzes the instruction set a +bit more in detail. The instructions I found are listed in table +\ref{TabHid62} (there are probably even more...): +\par +\begin{table*}[htbp] +\begin{center}\begin{tabular}{|l|l|} +\hline +instruction & in reality \\ +\hline +\hline +\tty{CLR A} & \tty{SUB A,A} \\ +\tty{SLA A} & \tty{ADD A,A} \\ +\tty{CLR addr} & \tty{LDI addr,0} \\ +\tty{NOP} & \tty{JRZ PC+1} \\ +\hline +\end{tabular}\end{center} +\caption{Hidden Macros in the ST62's Instruction Set\label{TabHid62}} +\end{table*} +Especially the last case is a bit astonishing...unfortunately, some +instructions are really missing. For example, there is an \tty{AND} +instruction but no \tty{OR}...not to speak of an \tty{XOR}. For this reason, the +include file \tty{STDDEF62.INC} contains also some helping macros +(additionally to register definitions). + +The original assembler AST6 delivered by SGS-Thomson partially uses +different pseudo instructions than AS. Apart from the fact that AS +does not mark pseudo instructions with a leading dot, the following +instructions are identical: +\begin{verbatim} + ASCII, ASCIZ, BLOCK, BYTE, END, ENDM, EQU, ERROR, MACRO, + ORG, TITLE, WARNING +\end{verbatim} +Table \ref{TabAST6} shows the instructions which have AS counterparts +with similar function. +\par +\begin{table*}[htbp] +\begin{center}\begin{tabular}{|l|l|l|} +\hline +AST6 & AS & meaning/function \\ +\hline +\hline +\tty{.DISPLAY} & \tty{MESSAGE} & output message \\ +\hline +\tty{.EJECT} & \tty{NEWPAGE} & new page in assembly listing \\ +\hline +\tty{.ELSE} & \tty{ELSEIF} & conditional assembly \\ +\hline +\tty{.ENDC} & \tty{ENDIF} & conditional assembly \\ +\hline +\tty{.IFC} & \tty{IF...} & conditional assembly \\ +\hline +\tty{.INPUT} & \tty{INCLUDE} & insert include file \\ +\hline +\tty{.LIST} & \tty{LISTING, MACEXP} & settings for listing \\ +\hline +\tty{.PL} & \tty{PAGE} & page length of listing \\ +\hline +\tty{.ROMSIZE} & \tty{CPU} & set target processor \\ +\hline +\tty{.VERS} & \tty{VERSION} (symbol) & query version \\ +\hline +\tty{.SET} & \tty{EVAL} & redefine variables \\ +\hline +\end{tabular}\end{center} +\caption{Equivalent Instructions AST6$\leftrightarrow$AS\label{TabAST6}} +\end{table*} + +%%--------------------------------------------------------------------------- + +\section{ST7} + +In \cite{ST7Man}, the \tty{.w} postfix to signify 16-bit addresses is only +defined for memory indirect operands. It is used to mark that a +16-bit address is stored at a zero page address. AS additionally +allows this postfix for absolute addresses or displacements of +indirect address expressions to force 16-bit displacements in spite +of an 8-bit value (0..255). + +%%--------------------------------------------------------------------------- + +\section{ST9} + +The ST9's bit addressing capabilities are quite limited: except for +the \tty{BTSET} instruction, only bits within the current set of working +registers are accessible. A bit address is therefore of the +following style: +\begin{verbatim} + rn.[!]b , +\end{verbatim} +whereby \tty{!} means an optional complement of a source operand. If a bit +is defined symbolically, the bit's register number is stored in bits +7..4, the bit's position is stored in bits 3..1 and the optional +complement is kept in bit 0. AS distinguishes explicit and symbolic +bit addresses by the missing dot. A bit's symbolic name therefore +must not contain a dot, thought it would be legal in respect to the +general symbol name conventions. It is also valid to invert a +symbolically referred bit: +\begin{verbatim} +bit2 bit r5.3 + . + . + bld r0.0,!bit2 +\end{verbatim} +This opportunity also allows to undo an inversion that was done at +definition of the symbol. + +The include file \tty{REGST9.INC} defines the symbolic names of all on-chip +registers and their associated bits. Keep however in mind that the +bit definitions only work after previously setting the working +register bank to the address of these peripheral registers! + +In contrast to the definition file delivered with the AST9 assembler +from SGS-Thomson, the names of peripheral register names are only +defined as general registers (\tty{R...}), not also as working registers +(\tty{r...}). The reason for this is that AS does not support register +aliases; a tribute to assembly speed. + +%%--------------------------------------------------------------------------- + +\section{6804} + +To be honest: I only implemented this processor in AS to quarrel +about SGS-Thomson's peculiar behaviour. When I first read the 6804's +data book, the ''incomplete'' instruction set and the built-in macros +immediately reminded me of the ST62 series manufactured by the same +company. A more thorough comparison of the opcodes gave surprising +insights: A 6804 opcode can be generated by taking the equivalent +ST62 opcode and mirroring all the bits! So Thomson obviously did a +bit of processor core recycling...which would be all right if they +would not try to hide this: different peripherals, motorola instead +of Zilog-style syntax, and the awful detail of \bb{not} mirroring operand +fields in the opcode (e.g. bit fields containing displacements). The +last item is also the reason that finally convinced me to support the +6804 in AS. I personally can only guess which department at Thomson +did the copy... + +In contrast to its ST62 counterpart, the include file for the 6804 +does not contain instruction macros that help a bit to deal with the +limited machine instruction set. This is left as an exercise to the +reader! + +%%--------------------------------------------------------------------------- + +\section{TMS3201x} + +It seems that every semiconductor's ambition is to invent an own +notation for hexadecimal numbers. Texas Instrument took an +especially eccentric approach for these processors: a $>$ sign as +prefix! The support of such a format in AS would have lead to +extreme conflicts with AS's compare and shift operators. I therefore +decided to use the Intel notation, which is what TI also uses for the +340x0 series and the 3201x's successors... + +The instruction word of these processors unfortunately does not have +enough bits to store all 8 bits for direct addressing. This is why +the data address space is split into two banks of 128 words. AS +principally regards the data address space as a linear segment of 256 +words and automatically clears bit 7 on direct accesses (an exception +is the \tty{SST} instruction that can only write to the upper bank). The +programmer has to take care that the bank flag always has the correct +value! + +Another hint that is well hidden in the data book: The \tty{SUBC} +instruction internally needs more than one clock for completion, but +the control unit already continues to execute the next instruction. +An instruction following \tty{SUBC} therefore may not access the +accumulator. AS does not check for such conditions! + +%%--------------------------------------------------------------------------- + +\section{TMS320C2x} + +As I did not write this code generator myself (that does not lower +its quality by any standard), I can only roughly line out why there +are some instructions that force a prefixed label to be untyped, i.e. +not assigned to any specific address space: The 2x series of TMS +signal processors has a code and a data segment which are both 64 +Kbytes large. Depending on external circuitry, code and data space may +overlap, e.g. to allow storage of constants in the code area and +access them as data. Data storage in the code segment may be +necessary because older versions of AS assume that the data segment +only consists of RAM that cannot have a defined power-on state in a +single board system. They therefore reject storage of contents in +other segments than \tty{CODE}. Without the feature of making symbols +untyped, AS would punish every access to a constant in code space +with a warning (''symbol out of wrong segment''). To say it in detail, +the following instructions make labels untyped: +\begin{quote}\tt + BSS, STRING, RSTRING, BYTE, WORD , LONG\\ + FLOAT, DOUBLE, EFLOAT, BFLOAT and TFLOAT +\rm\end{quote} +If one needs a typed label in front of one of these instructions, one +can work around this by placing the label in a separate line just +before the pseudo instruction itself. On the other hand, it is +possible to place an untyped label in front of another pseudo +instruction by defining the label with \tty{EQU}, e.g. +\begin{verbatim} + EQU $ . +\end{verbatim} + +%%--------------------------------------------------------------------------- + +\section{TMS320C3x} + +The syntax detail that created the biggest amount of headache for me +while implementing this processor family is the splitting of parallel +instructions into two separate source code lines. Fortunately, both +instructions of such a construct are also valid single instructions. +AS therefore first generates the code for the first instruction and +replaces it by the parallel machine code when a parallel construct is +encountered in the second line. This operation can be noticed in the +assembly listing by the machine code address that does not advance +and the double dot replaced with a \tty{R}. + +Compared to the TI assembler, AS is not as flexible regarding the +position of the double lines that signify a parallel operation +(\tty{||}): One either has to place them like a label (starting in the +first column) or to prepend them to the second mnemonic. The line +parser of AS will run into trouble if you do something else... + +%%--------------------------------------------------------------------------- + +\section{TMS9900} + +Similar to most older TI microprocessor families, TI used an own +format for hexadecimal and binary constants. AS instead favours the +Intel syntax which is also common for newer processor designs from +TI. + +The TI syntax for registers allows to use a simple integer number +between 0 and 15 instead of a real name (\tty{Rx} or \tty{WRx}). +This has two consequences: +\begin{itemize} +\item{\tty{R0...R15} resp. \tty{WR0..WR15} are simple predefined integer + symbols with values from 0 to 15, and the definition of register + aliases is a simple matter of \tty{EQU}.} +\item{In contrast to several other processors, I cannot offer the + additional AS feature that allows to omit the character sigifying + absolute addressing (a \@ sign in this case). As a missing + character would mean register numbers (from 0 to 15) in this case, + it was not possible to offer the optional omission.} +\end{itemize} +Furthermore, TI sometimes uses \tty{Rx} to name registers and \tty{WRx} +at other places...currently both variants are recognized by AS. + +%%--------------------------------------------------------------------------- + +\section{TMS70Cxx} + +This processor family belongs to the older families developed by TI +and therefore TI's assemblers use their proprietary syntax for +hexadecimal resp. binary constants (a prefixed $<$ resp. \tty{?} character). +As this format could not be realized for AS, the Intel syntax is used +by default. This is the format TI to which also switched over when +introducing the successors, of this family, the 370 series of +microcontrollers. Upon a closer inspection of both's machine +instruction set, one discovers that about 80\% of all instruction are +binary upward compatible, and that also the assembly syntax is almost +identical - but unfortunately only almost. TI also took the chance to +make the syntax more orthogonal and simple. I tried to introduce +the majority of these changes also into the 7000's instruction set: +\begin{itemize} +\item{It is valid to use the more common \tty{\#} sign for immediate addressing + instead of the percent sign.} +\item{If a port address (\tty{P...}) is used as source or destination in a + \tty{AND, BTJO, BTJZ, MOV, OR}, or \tty{XOR} instruction, it is not necessary + to use the mnemonic variant with an appended \tty{P} - the general + form is sufficient.} +\item{The prefixed \tty{@} sign for absolute or B-relative addressing may be + omitted.} +\item{Instead of \tty{CMPA, CMP} with \tty{A} as target may be written.} +\item{Instead of \tty{LDA} resp. \tty{STA}, one can simply use the + \tty{MOV} instruction with \tty{A} as source resp. destination.} +\item{One can write \tty{MOVW} instead of \tty{MOVD}.} +\item{It is valid to abbreviate \tty{RETS} resp. \tty{RETI} as \tty{RTS} + resp. \tty{RTI}.} +\item{\tty{TSTA} resp. \tty{TSTB} may be written as \tty{TST A} resp. + \tty{TST B}.} +\item{\tty{XCHB B} is an alias for \tty{TSTB}.} +\end{itemize} +An important note: these variants are only allowed for the TMS70Cxx - +the corresponding 7000 variants are not allowed for the 370 series! + +%%--------------------------------------------------------------------------- + +\section{TMS370xxx} + +Though these processors do not have specialized instructions for bit +manipulation, the assembler creates (with the help of the \tty{DBIT} +instruction - see there) the illusion as if single bits were +addressable. To achieve this, the \tty{DBIT} instructions stores an +address along with a bit position into an integer symbol which may +then be used as an argument to the pseudo instructions \tty{SBIT0, SBIT1, +CMPBIT, JBIT0}, and \tty{JBIT1}. These are translated into the instructions +\tty{OR, AND, XOR, BTJZ}, and \tty{BTJO} with an appropriate bit mask. + +There is nothing magic about these bit symbols, they are simple +integer values that contain the address in their lower and the bit +position in their upper half. One could construct bit symbols +without the \tty{DBIT} instruction, like this: +\begin{verbatim} +defbit macro name,bit,addr +name equ addr+(bit<<16) + endm +\end{verbatim} +but this technique would not lead to the \tty{EQU}-style syntax defined by +TI (the symbol to be defined replaces the label field in a line). +\bb{CAUTION!} Though \tty{DBIT} allows an arbitrary address, the pseudo +instructions can only operate with addresses either in the range from +0..255 or 1000h..10ffh. The processor does not have an absolute +addressing mode for other memory ranges... + +%%--------------------------------------------------------------------------- + +\section{MSP430} +\label{MSPSpec} + +The MSP was designed to be a RISC processor with a minimal power +consumption. The set of machine instructions was therefore reduced +to the absolute minimum (RISC processors do not have a microcode ROM +so every additional instruction has to be implemented with additional +silicon that increases power consumption). A number of instructions +that are hardwired for other processors are therefore emulated with +other instructions. For AS, these instructions are defined in the +include file \tty{REGMSP.INC}. You will get error messages for more than +half of the instructions defined by TI if you forget to include this +file! + +%%--------------------------------------------------------------------------- + +\section{COP8 \& SC/MP} +\label{COP8Spec} + +National unfortunately also decided to use the syntax well known from +IBM mainframes (and much hated by me..) to write non-decimal integer +constants. Just like with other processors, this does not work with +AS's parser. ASMCOP however fortunately also seems to allow the C +syntax, which is why this became the default for the COP series and +the SC/MP... + +%%--------------------------------------------------------------------------- + +\section{SC144xxx} +\label{SC144xxspec} + +Originally, National offered a relatively simple assembler for this series +of DECT controllers. An much more powerful assembler has been announced +by IAR, but it is not available up to now. However, since the development +tools made by IAR are as much target-independent as possible, one can +roughly estimate the pseudo instructions it will support by looking at +other available target platforms. With this in mind, the (few) +SC144xx-specific instructions {\tt DC, DC8, DW16, DS, DS8, DS16, DW} were +designed. Of course, I didn't want to reinvent the wheel for pseudo +instructions whose functionality is already part of the AS core. +Therefore, here is a little table with equivalences. The statements +\tty{ALIGN, END, ENDM, EXITM, MACRO, ORG, RADIX, SET,} and \tty{REPT} both +exist for the IAR assembler and AS and have same functionality. Changes +are needed for the following instructions: + +\begin{table*}[htb] +\begin{center}\begin{tabular}{|l|l|l|} +\hline +IAR & AS & Funktion\\ +\hline +\hline +\tty{\#include} & \tty{include} & include file \\ +\tty{\#define} & \tty{SET, EQU} & define symbol \\ +\tty{\#elif, ELIF, ELSEIF} & \tty{ELSEIF} & start another \\ + & & IF branch \\ +\tty{\#else, ELSE} & \tty{ELSE} & last branch of an IF \\ + & & construct \\ +\tty{\#endif, ENDIF} & \tty{ENDIF} & ends an IF construct \\ +\tty{\#error} & \tty{ERROR, FATAL} & create error message \\ +\tty{\#if, IF} & \tty{IF} & start an IF construct \\ +\tty{\#ifdef} & \tty{IFDEF} & symbol defined ? \\ +\tty{\#ifndef} & \tty{IFNDEF} & symbol not defined ? \\ +\tty{\#message} & \tty{MESSAGE} & output message \\ +\tty{=, DEFINE, EQU} & \tty{=, EQU} & fixed value assignment \\ +\tty{EVEN} & \tty{ALIGN 2} & force PC to be equal \\ +\tty{COL, PAGSIZ} & \tty{PAGE} & set page size for listing \\ +\tty{ENDR} & \tty{ENDM} & end REPT construct \\ +\tty{LSTCND, LSTOUT} & \tty{LISTING} & control amount of listing \\ +\tty{LSTEXP, LSTREP} & \tty{MACEXP} & list expanded macros? \\ +\tty{LSTXRF} & \verb!! & generate cross reference \\ +\tty{PAGE} & \tty{NEWPAGE} & new page in listing \\ +\tty{REPTC} & \tty{IRPC} & repetition with character \\ + & & replacement \\ +\hline +\end{tabular}\end{center} +\end{table*} + +There is no direct equivalent for {\tt CASEON}, {\tt CASEOFF,} +\tty{LOCAL}, \tty{LSTPAG}, \tty{\#undef,} and {\tt REPTI}. + +A 100\% equivalent is of course impossible as long as there is no C-like +preprocessor in AS. C-like comments unfortunately are also impossible +at the moment. Caution: When modifying IAR codes for AS, do not forget to +move converted preprocessor statements out of column 1 as AS reserves this +column exclusively for labels! + +%%--------------------------------------------------------------------------- + +\section{75K0} +\label{75K0Spec} + +Similar to other processors, the assembly language of the 75 series +also knows pseudo bit operands, i.e. it is possible to assign a +combination of address and bit number to a symbol that can then be +used as an argument for bit oriented instructions just like explicit +expressions. The following three instructions for example generate +the same code: +\begin{verbatim} +ADM sfr 0fd8h +SOC bit ADM.3 + + skt 0fd8h.3 + skt ADM.3 + skt SOC +\end{verbatim} +AS distinguishes direct and symbolic bit accesses by the missing dot +in symbolic names; it is therefore forbidden to use dots in symbol +names to avoid misunderstandings in the parser. + +The storage format of bit symbols mostly accepts the binary coding in +the machine instructions themselves: 16 bits are used, and there is +a ''long'' and a ''short'' format. The short format can store the +following variants: +\begin{itemize} +\item{direct accesses to the address range from 0FBxH to 0FFxH} +\item{indirect accesses in the style of \tty{Addr.@L} (0FC0H $\leq$ \tty{Addr} $\leq$0FFFH)} +\item{indirect accesses in the style of \tty{@H+d4.bit}} +\end{itemize} +The upper byte is set to 0, the lower byte contains the bit +expression coded according to \cite{NEC75}. The long format in contrast +only knows direct addressing, but it can cover the whole address space +(given a correct setting of MBS and MBE). A long expression stores +bits 0..7 of the address in the lower byte, the bit position in bits +8 and 9, and a constant value of 01 in bits 10 and 11. The highest +bits allow to distinguish easily between long and short addresses via +a check if the upper byte is 0. Bits 12..15 contain bits 8..11 of +the address; they are not needed to generate the code, but they have +to be stored somewhere as the check for correct banking can only +take place when the symbol is actually used. + +%%--------------------------------------------------------------------------- + +\section{78K0} +\label{78K0Spec} + +NEC uses different ways to mark absolute addressing in its data +books: +\begin{itemize} +\item{absolute short: no prefix} +\item{absolute long: prefix of \tty{!}} +\item{PC relative: prefix of \tty{\$}} +\end{itemize} +Under AS, these prefixes are only necessary if one wants to force a +certain addressing mode and the instruction allows different +variants. Without a prefix, AS will automatically select the shortest +variant. It should therefore rarely be necessary to use a prefix in +practice. + +%%--------------------------------------------------------------------------- + +\section{$\mu$PD772x} + +Both the 7720 and 7725 are provided by the same code generator and are +extremely similar in their instruction set. One should however not +beleive that they are binary compatible: To get space for the longer +address fields and additional instructions, the bit positions of some +fields in the instruction word have changed, and the instruction length +has changed from 23 to 24 bits. The code format therefore uses different +header ids for both CPUs. + +They both have in common that in addition to the code and data segment, +there is also a ROM for storage of constants. In the case of AS, it is +mapped onto the \tty{ROMDATA} segment! + +%%=========================================================================== + +\cleardoublepage +\chapter{File Formats} + +In this chapter, the formats of files AS generates shall be explained +whose formats are not self-explanatory. + +%%--------------------------------------------------------------------------- + +\section{Code Files} +\label{SectCodeFormat} + +The format for code files generated by the assembler must be able to +separate code parts that were generated for different target +processors; therefore, it is a bit different from most other formats. +Though the assembler package contains tools to deal with code files, +I think is a question of good style to describe the format in short: + +If a code file contains multibyte values, they are stored in little +endian order. This rule is already valid for the 16-bit magic word +\$1489, i.e. every code file starts with the byte sequence \$89/\$14. + +This magic word is followed by an arbitrary number of ''records''. A +record may either contain a continuous piece of the code or certain +additional information. Even without switching to different +processor types, a file may contain several code-containing records, +in case that code or constant data areas are interrupted by reserved +memory areas that should not be initialized. This way, the assembler +tries to keep the file as short as possible. + +Common to all records is a header byte which defines the record's type +and its contents. Written in a PASCALish way, the record structure +can be described in the following way: +\begin{verbatim} +FileRecord = RECORD CASE Header:Byte OF + $00:(Creator:ARRAY[] OF Char); + $01.. + $7f:(StartAdr : LongInt; + Length : Word; + Data : ARRAY[0..Length-1] OF Byte); + $80:(EntryPoint:LongInt); + $81:(Header : Byte; + Segment : Byte; + Gran : Byte; + StartAdr : LongInt; + Length : Word; + Data : ARRAY[0..Length-1] OF Byte); + END +\end{verbatim} +This description does not express fully that the length of data +fields is variable and depends on the value of the \tty{Length} entries. + +A record with a header byte of \$81 is a record that may contain code +or data from arbitrary segments. The first byte (\tty{Header}) describes +the processor family the following code resp. data was generated for (see +table \ref{TabHeader}). +\begin{table*}[htbp] +\begin{center}\begin{tabular}{|c|l||c|l|} +\hline +header & family & header & family \\ +Header & Familie & Header & Familie \\ +\hline +\hline +\input{../doc_DE/tabids.tex} +\end{tabular}\end{center} +\caption{Header Bytes for the Different Processor + Families\label{TabHeader}} +\end{table*} +The \tty{Segment} field signifies the address space the following code +belongs to. The assignment defined in table \ref{TabSegments} applies. +\begin{table*}[htbp] +\begin{center}\begin{tabular}{|c|l||c|l|} +\hline +number & segment & number & segment \\ +\hline +\hline +\$00 & $<$undefined$>$ & \$01 & \tty{CODE} \\ +\$02 & \tty{DATA} & \$03 & \tty{IDATA} \\ +\$04 & \tty{XDATA} & \$05 & \tty{YDATA} \\ +\$06 & \tty{BDATA} & \$07 & \tty{IO} \\ +\$08 & \tty{REG} & \$09 & \tty{ROMDATA} \\ +\hline +\end{tabular}\end{center} +\caption{Codings of the {\tt Segment} Field\label{TabSegments}} +\end{table*} +The \tty{Gran} field describes the code's ''granularity'', i.e. the size of +the smallest addressable unit in the following set of data. This +value is a function of processor type and segment and is an important +parameter for the interpretation of the following two fields that +describe the block's start address and its length: While the start +address refers to the granularity, the \tty{Length} value is always +expressed in bytes! For example, if the start address is \$300 and +the length is 12, the resulting end address would be \$30b for a +granularity of 1, however \$303 for a granularity of 4! Granularities +that differ from 1 are rare and mostly appear in DSP CPU's that are +not designed for byte processing. For example, a DSP56K's address +space is organized in 64 Kwords of 16 bits. The resulting storage +capacity is 128 Kbytes, however it is organized as $2^{16}$ words that +are addressed with addresses 0,1,2,...65535! + +The start address is always 32 bits in size, independent of the +processor family. In contrast, the length specification has only 16 +bits, i.e. a record may have a maximum length of 4+4+2+(64K-1) = +65545 bytes. + +Data records with a Header ranging from \$01 to \$7f present a shortcut +and preserve backward compatibility to earlier definitions of the +file format: in their case, the Header directly defines the processor +type, the target segment is fixed to \tty{CODE} and the granularity is +implicitly given by the processor type, rounded up to the next power +of two. AS prefers to use these records whenever data or code should +go into the \tty{CODE} segment. + +A record with a Header of \$80 defines an entry point, i.e. the +address where execution of the program should start. Such a record +is the result of an \tty{END} statement with a corresponding address as +argument. + +The last record in a file bears the Header \$00 and has only a string +as data field. This string does not have an explicit length +specification; its end is equal to the file's end. The string +contains only the name of the program that created the file and has +no further meaning. + +%%--------------------------------------------------------------------------- + +\section{Debug Files} +\label{SectDebugFormat} + +Debug files may optionally be generated by AS. They deliver important +information for tools used after assembly, like disassemblers or +debuggers. AS can generate debug files in one of three formats: On the +one hand, the object format used by the AVR tools from Atmel respectively +a NoICE-compatible command file, and on the other hand an own format. The +first two are described in detail in \cite{AVRObj} resp. the NoICE +documentations, which is why the following description limits itself to +the AS-specific MAP format: + +The information in a MAP file is split into three groups: +\begin{itemize} +\item{symbol table} +\item{memory usage per section} +\item{machine addresses of source lines} +\end{itemize} +The second item is listed first in the file. A single entry in this +list consists of two numbers that are separated by a \tty{:} character: +\begin{verbatim} + :
+\end{verbatim} +Such an entry states that the machine code generated for the source +statement in a certain line is stored at the mentioned address +(written in hexadecimal notation). With such an information, a +debugger can display the corresponding source lines while stepping +through a program. As a program may consist of several include +files, and due to the fact that a lot of processors have more than +one address space (though admittedly only one of them is used to +store executable code), the entries described above have to be +sorted. AS does this sorting in two levels: The primary sorting +criteria is the target segment, and the entries in one of these +sections are sorted according to files. The sections resp. +subsections are separated by special lines in the style of +\begin{verbatim} +Segment +\end{verbatim} +resp. +\begin{verbatim} +File . +\end{verbatim} +The source line info is followed by the symbol table. Similar to the +source line info, the symbol table is primarily sorted by the +segments individual symbols are assigned to. In contrast to the +source line info, an additional section \tty{NOTHING} exists which contains +the symbols that are not assigned to any specific segment (e.g. +symbols that have been defined with a simple \tty{EQU} statement). A +section in the symbol table is started with a line of the following +type: +\begin{verbatim} +Symbols in Segment +\end{verbatim} +The symbols in a section are sorted according to the alphabetical +order of their names, and one symbol entry consists of exactly one +line. Such a line consists of 5 fields witch are separated by at +least a single space: + +The first field is the symbol's name, possibly extended by a section +number enclosed in brackets. Such a section number limits the +range of validity for a symbol. The second field designates the +symbol's type: \tty{Int} stands for integer values, \tty{Float} for floating +point numbers, and \tty{String} for character arrays. The third field +finally contains the symbol's value. If the symbol contains a +string, it is necessary to use a special encoding for control +characters and spaces. Without such a coding, spaces in a string +could be misinterpreted as delimiters to the next field. AS uses the +same syntax that is also valid for assembly source files: Instead of +the character, its ASCII value with a leading backslash (\verb!\!) is +inserted. For example, the string +\begin{verbatim} + This is a test +\end{verbatim} +becomes +\begin{verbatim} + This\032is\032\a\032test . +\end{verbatim} +The numerical value always has three digits and has to be interpreted +as a decimal value. Naturally, the backslash itself also has to be +coded this way. + +The fourth field specifies - if available - the size of the data +structure placed at the address given by the symbol. A debugger may +use this information to automatically display variables in their +correct length when they are referred symbolically. In case AS does +not have any information about the symbol size, this field simply +contains the value -1. + +Finally,the fifth field states via the values 0 or 1 if the symbol +has been used during assembly. A program that reads the symbol table +can use this field to skip unused symbols as they are probably unused +during the following debugging/disassembly session. + +The third section in a debug file describes the program's sections in +detail. The need for such a detailed description arises from the +sections' ability to limit the validity range of symbols. A symbolic +debugger for example cannot use certain symbols for a reverse +translation, depending on the current PC value. It may also have to +regard priorities for symbol usage when a value is represented by +more than one symbol. The definition of a section starts with a line +of the following form: +\begin{verbatim} +Info for Section nn ssss pp +\end{verbatim} +\tty{nn} specifies the section's number (the number that is also used in +the symbol table as a postfix for symbol names), \tty{ssss} gives its name +and \tty{pp} the number of its parent section. The last information is +needed by a retranslator to step upward through a tree of sections +until a fitting symbol is found. This first line is followed by a +number of further lines that describe the code areas used by this +section. Every single entry (exactly one entry per line) either +describes a single address or an address range given by a lower and +an upper bound (separation of lower and upper bound by a minus sign). +These bounds are ''inclusive'', i.e. the bounds themselves also belong +to the area. Is is important to note that an area belonging to a +section is not additionally listed for the section's parent sections +(an exception is of course a deliberate multiple allocation of address +areas, but you would not do this, would you?). On the one hand, this +allows an optimized storage of memory areas during assembly. On the +other hand, this should not be an obstacle for symbol backtranslation +as the single entry already gives an unambiguous entry point for the +symbol search path. The description of a section is ended by an +empty line or the end of the debug file. + +Program parts that lie out of any section are not listed separately. +This implicit ''root section'' carries the number -1 and is also used +as parent section for sections that do not have a real parent +section. + +It is possible that the file contains empty lines or comments (semi +colon at line start). A program reading the file has to ignore such +lines. + +%%=========================================================================== + +\cleardoublepage +\chapter{Utility Programs} +\label{ChapTools} + +To simplify the work with the assembler's code format a bit, I added +some tools to aid processing of code files. These programs are +released under the same license terms as stated in section +\ref{SectLicense}! + +Common to all programs are the possible return codes they may deliver +upon completion (see table \ref{TabToolReturns}). +\par +\begin{table*}[h] +\begin{center}\begin{tabular}{|c|l|} +\hline +return code & error condition \\ +\hline +\hline +0 & no errors \\ +1 & error in command line parameters \\ +2 & I/O error \\ +3 & file format error \\ +\hline +\end{tabular}\end{center} +\caption{Return Codes of the Utility Programs\label{TabToolReturns}} +\end{table*} +Just like AS, all programs take their input from STDIN and write +messages to STDOUT (resp. error messages to STDERR). Therefore, +input and output redirections should not be a problem. + +In case that numeric or address specifications have to be given in +the command line, they may also be written in hexadecimal notation +when they are prefixed with a dollar character or a \tty{0x} like in C. +(e.g. \verb!$10! or \verb!0x10! instead of 16). + +Unix shells however \marginpar{{\em UNIX}} assign a special meaning to the +dollar sign, which makes it necessary to escape a dollar sign with a +backslash. The \tty{0x} variant is definitely more comfortable in this case. + +Otherwise, calling conventions and variations are equivalent to those +of AS (except for PLIST and AS2MSG); i.e. it is possible to store +frequently used parameters in an environment variable (whose name is +constructed by appending CMD to the program's name, i.e. \tty{BINDCMD} for +BIND), to negate options, and to use all upper- resp. lower-case +writing (for details on this, see section \ref{SectCallConvention}). + +Address specifications always relate to the granularity of the +processor currently in question; for example, on a PIC, an address +difference of 1 means a word and not a byte. + +%%--------------------------------------------------------------------------- + +\section{PLIST} + +PLIST is the simplest one of the five programs supplied: its purpose +is simply to list all records that are stored in a code file. As the +program does not do very much, calling is quite simple: +\begin{verbatim} + PLIST +\end{verbatim} +The file name will automatically be extended with the extension \tty{P} if +it doesn't already have one. + +\bb{CAUTION!} At this place, no wildcards are allowed! If there is a +necessity to list several files with one command, use the following +''mini batch'': +\begin{verbatim} + for %n in (*.p) do plist %n +\end{verbatim} +PLIST prints the code file's contents in a table style, whereby +exactly one line will be printed per record. The individual rows +have the following meanings: +\begin{itemize} +\item{code type: the processor family the code has been generated for.} +\item{start address: absolute memory address that expresses the load + destination for the code.} +\item{length: length of this code chunk in bytes.} +\item{end address: last address of this code chunk. This address + is calculated as start address+length-1.} +\end{itemize} +All outputs are in hexadecimal notation. + +Finally, PLIST will print a copyright remark (if there is one in the +file), together with a summaric code length. + +Simply said, PLIST is a sort of DIR for code files. One can use it +to examine a file's contents before one continues to process it. + +%%--------------------------------------------------------------------------- + +\section{BIND} + +BIND is a program that allows to concatenate the records of several +code files into a single file. A filter function is available that +can be used to copy only records of certain types. Used in this way, +BIND can also be used to split a code file into several files. + +The general syntax of BIND is +\begin{verbatim} + BIND [options] +\end{verbatim} +Just like AS, BIND regards all command line arguments that do not +start with a \tty{+, -} or \tty{/} as file specifications, of which the last one +must designate the destination file. All other file specifications +name sources, which may again contain wildcards. + +Currently, BIND defines only one command line option: +\begin{itemize} +\item{\tty{f $<$Header[,Header]$>$}: sets a list of record headers that should + be copied. Records with other header IDs will + not be copied. Without such an option, all + records will be copied. The headers given in + the list correspond to the \tty{HeaderID} field of the + record structure described in section \ref{SectCodeFormat}. + Individual headers in this list are separated + with commas.} +\end{itemize} +For example, to filter all MCS-51 code out of a code file, use BIND +in the following way: +\begin{verbatim} + BIND -f $31 +\end{verbatim} +If a file name misses an extension, the extension \tty{P} will be added +automatically. + +%%--------------------------------------------------------------------------- + +\section{P2HEX} + +P2HEX is an extension of BIND. It has all command line options of BIND and +uses the same conventions for file names. In contrary to BIND, the +target file is written as a Hex file, i.e. as a sequence of lines +which represent the code as ASCII hex numbers. + +P2HEX knows 8 different target formats, which can be selected via the +command line parameter \tty{F}: +\begin{itemize} +\item{Motorola S-Records \tty{(-F Moto)}} +\item{MOS Hex \tty{(-F MOS)}} +\item{Intel Hex (Intellec-8, \tty{-F Intel)}} +\item{16-Bit Intel Hex (MCS-86, \tty{-F Intel16)}} +\item{32-Bit Intel Hex \tty{(-F Intel32)}} +\item{Tektronix Hex \tty{(-F Tek)}} +\item{Texas Instruments DSK \tty{(-F DSK)}} +\item{Atmel AVR Generic (-F Atmel, see \cite{AVRObj})} +\end{itemize} +If no target format is explicitly specified, P2HEX will automatically +choose one depending in the processor type: S-Records for Motorola +CPUs, Hitachi, and TLCS-900, MOS for 65xx/MELPS, DSK for the 16 bit +signal processors from Texas, Atmel Generic for the AVRs, and Intel Hex +for the rest. Depending on the start addresses width, the S-Record +format will use Records of type 1, 2, or 3, however, records in one +group will always be of the same type. This automatism can be partially +suppressed via the command line option +\begin{verbatim} + -M <1|2|3> +\end{verbatim} +A value of 2 resp. 3 assures that that S records with a minimum type of 2 +resp. 3 will be used, while a value of 1 corresponds to the full +automatism. + +The Intel, MOS and Tektronix formats are limited to 16 bit addresses, the +16-bit Intel format reaches 4 bits further. Addresses that are to long +for a given format will be reported by P2HEX with a warning; afterwards, +they will be truncated (!). + +For the PIC microcontrollers, the switch +\begin{verbatim} +-m <0..3> +\end{verbatim} +allows to generate the three different variants of the Intel Hex +format. Format 0 is INHX8M which contains all bytes in a +Lo-Hi-Order. Addresses become double as large because the PICs have +a word-oriented address space that increments addresses only by one +per word. This format is also the default. With Format 1 (INHX16M), +bytes are stored in their natural order. This is the format +Microchip uses for its own programming devices. Format 2 (INHX8L) +resp. 3 (INHX8H) split words into their lower resp. upper bytes. +With these formats, P2HEX has to be called twice to get the complete +information, like in the following example: +\begin{verbatim} + p2hex test -m 2 + rename test.hex test.obl + p2hex test -m 3 + rename test.hex test.obh +\end{verbatim} +For the Motorola format, P2HEX additionally uses the S5 record type +mentioned in \cite{CPM68K}. This record contains the number of data +records (S1/S2/S3) to follow. As some programs might not know how to +deal with this record, one can suppress it with the option +\begin{verbatim} + +5 . +\end{verbatim} +In case a source file contains code record for different processors, +the different hex formats will also show up in the target file - it +is therefore strongly advisable to use the filter function. + +Apart form this filter function, P2HEX also supports an address +filter, which is useful to split the code into several parts (e.g. +for a set of EPROMs): +\begin{verbatim} +-r - +\end{verbatim} +The start address is the first address in the window, and the end +address is the last address in the window, \bb{not} the first address +that is out of the window. For example, to split an 8051 program +into 4 2764 EPROMs, use the following commands: +\begin{verbatim} +p2hex eprom1 -f $31 -r $0000-$1fff +p2hex eprom2 -f $31 -r $2000-$3fff +p2hex eprom3 -f $31 -r $4000-$5fff +p2hex eprom4 -f $31 -r $6000-$7fff +\end{verbatim} +By default, the address window is 32 Kbytes large and starts at +address 0. + +\bb{CAUTION!} This type of splitting does not change the absolute +addresses that will be written into the files! If the addresses in +the individual hex files should rather start at 0, one can force this +with the additional switch +\begin{verbatim} + -a . +\end{verbatim} +On the other hand, to move the addresses to a different location, one may +use the switch +\begin{verbatim} + -R . +\end{verbatim} +The value given is an {\em offset}, i.e. it is added to the addresses +given in the code file. +\par +A special value for start and stop address arguments is a single +dollar sign (\tty{\$}). This stands for the very first resp. last address +that has been used in the code file. So, if you want to be sure +that always the whole program is stored in the hex file, set the +address filter +\begin{verbatim} + -r $-$ +\end{verbatim} +and you do not have to worry about address filters any more. Dollar +signs and fixed addresses may of course me mixed. For example, the +setting +\begin{verbatim} + -r $-$7fff +\end{verbatim} +limits the upper end to 32 Kbytes. + +By using an offset, it is possible to move a file's contents to an +arbitrary position. This offset is simply appended to a file's name, +surrounded with parentheses. For example, if the code in a file +starts at address 0 and you want to move it to address 1000 hex in the +hex file, append \tty{(\$1000)} to the file's name (without spaces!). + +As the TI DSK format has the ability to distinguish between data and +code, there is a switch +\begin{verbatim} + -d - +\end{verbatim} +to designate the address range that should be written as data instead +of code. For this option, single dollar signs are \bb{not} allowed! While +this switch is only relevant for the DSK format, the option +\begin{verbatim} + -e
+\end{verbatim} +is also valid for the Intel and Motorola formats. Its purpose is to +set the entry address that will be inserted into the hex file. If +such a command line parameter is missing, P2HEX will search a +corresponding entry in the code file. If even this fails, no entry +address will be written to the hex file (DSK/Intel) or the field +reserved for the entry address will be set to 0 (Motorola). + +Unfortunately, one finds different statements about the last line of +an Intel-Hex file in literature. Therefore, P2HEX knows three +different variants that may be selected via the command-line +parameter \tty{i} and an additional number: +\begin{verbatim} + 0 :00000001FF + 1 :00000001 + 2 :0000000000 +\end{verbatim} +By default, variant 0 is used which seems to be the most common one. + +If the target file name does not have an extension, an extension of +\tty{HEX} is supposed. + +By default, P2HEX will print a maximum of 16 data bytes per line, +just as most other tools that output Hex files. If you want to +change this, you may use the switch +\begin{verbatim} +-l . +\end{verbatim} +The allowed range of values goes from 2 to 254 data bytes; odd values +will implicitly be rounded down to an even count. + +In most cases, the temporary code files generated by AS are not of +any further need after P2HEX has been run. The command line option +\begin{verbatim} +-k +\end{verbatim} +allows to instruct P2HEX to erase them automatically after +conversion. + +In contrast to BIND, P2HEX will not produce an empty target file if +only one file name (i.e. the target name) has been given. Instead, +P2HEX will use the corresponding code file. Therefore, a minimal +call in the style of +\begin{verbatim} + P2HEX +\end{verbatim} +is possible, to generate \tty{$<$name$>$.hex} out of \tty{$<$name$>$.p}. + +%%--------------------------------------------------------------------------- + +\section{P2BIN} + +P2BIN works similar to P2HEX and offers the same options (except for +the a and i options that do not make sense for binary files), +however, the result is stored as a simple binary file instead of a +hex file. Such a file is for example suitable for programming an +EPROM. + +P2BIN knows three additional options to influence the resulting binary +file: +\begin{itemize} +\item{\tty{l $<$8 bit number$>$}: sets the value that should be used to fill + unused memory areas. By default, the value + \$ff is used. This value assures that every + half-way intelligent EPROM burner will skip + these areas. This option allows to set different values, + for example if you want to + generate an image for the EPROM versions of + MCS-48 microcontrollers (empty cells of their + EPROM array contain zeroes, so \$00 would be + the correct value in this case).} +\item{\tty{s}: commands the program to calculate a checksum + of the binary file. This sum is printed as + a 32-bit value, and the two's complement of + the least significant bit will be stored in + the file's last byte. This way, the modulus- + 256-sum of the file will become zero.} +\item{\tty{m}: is designed for the case that a CPU with a + 16- or 32-bit data bus is used and the file + has to be split for several EPROMs. The + argument may have the following values: + \begin{itemize} + \item{\tty{ALL}: copy everything} + \item{\tty{ODD}: copy all bytes with an odd address} + \item{\tty{EVEN}: copy all bytes with an even address} + \item{\tty{BYTE0..BYTE3}: copy only bytes with an address of + 4n+0 .. 4n+3} + \item{\tty{WORD0, WORD1}: copy only the lower resp. upper 16- + bit word of a 32-bit word} + \end{itemize}} +\end{itemize} +To avoid confusions: If you use this option, the resulting binary file +will become smaller because only a part of the source will be copied. +Therefore, the resulting file will be smaller by a factor of 2 or 4 +compared to \tty{ALL}. This is just natural... + +In case the code file does not contain an entry address, one may set +it via the \tty{-e} command line option just like with P2HEX. Upon +request, P2BIN prepends the resulting image with this address. The +command line option +\begin{verbatim} +-S +\end{verbatim} +activates this function. It expects a numeric specification ranging +from 1 to 4 as parameter which specifies the length of the address +field in bytes. This number may optionally be prepended wit a \tty{L} or +\tty{B} letter to set the endian order of the address. For example, the +specification \tty{B4} generates a 4 byte address in big endian order, +while a specification of \tty{L2} or simply \tty{2} creates a 2 byte address +in little endian order. + +%%--------------------------------------------------------------------------- + +\section{AS2MSG} + +AS2MSG is not a tool in the real sense, it is a filter that was +designed to simplify the work with the assembler for (fortunate) +users of Borland Pascal 7.0. The DOS IDEs feature a 'tools' menu +that can be extended with own programs like AS. The filter allows to +directly display the error messages paired with a line +specification delivered by AS in the editor window. A new entry has +to be added to the tools menu to achieve this (Options/Tools/New). +Enter the following values: +\begin{verbatim} + - Title: ~m~acro assembler + - Program path: AS + - Command line: + -E !1 $EDNAME $CAP MSG(AS2MSG) $NOSWAP $SAVE ALL + - assign a hotkey if wanted (e.g. Shift-F7) +\end{verbatim} +The -E option assures that Turbo Pascal will not become puzzled by +STDIN and STDERR. + +I assume that AS and AS2MSG are located in a directory listed in the +\tty{PATH} variable. After pressing the appropriate hotkey (or selecting +AS from the tools menu), as will be called with the name of the file +loaded in the active editor window as parameter. The error messages +generated during assembly are redirected to a special window that +allows to browse through the errors. \tty{Ctrl-Enter} jumps to an +erroneous line. The window additionally contains the statistics AS +prints at the end of an assembly. These lines obtain the dummy line +number 1. + +\tty{TURBO.EXE} (Real Mode) and \tty{BP.EXE} (Protected Mode) may be used for +this way of working with AS. I recommend however BP, as this version +does not have to 'swap' half of the DOS memory before before AS is +called. + +%%=========================================================================== +\appendix + +\cleardoublepage +\chapter{Error Messages of AS} +\label{ChapErrMess} + +Here is a list of all error messages emitted by AS. Each error message is +described by: +\begin{itemize} +\item{the internal error number (it is displayed only if AS is started with the + \tty{-n} option)} +\item{the text of the error message} +\item{error type: + \begin{itemize} + \item{Warning: informs the user that a possible error was + found, or that some inefficient binary code + could be generated. The assembly process is not + stopped.} + \item{Error: an error was detected. The assembly process + continues, but no binary code is emitted.} + \item{Fatal: unrecoverable error. The assembly process is + terminated.} + \end{itemize}} +\item{reason of the error: the situation originating the error.} +\item{argument: a further explanation of the error message.} +\end{itemize} + +\par + +\newcommand{\errentry}[5] + {\item[#1]{#2 + \begin{description} + \item[Type:]{\ \\#3} + \item[Reason:]{\ \\#4} + \item[Argument:]{\ \\#5} + \end{description}} + } + +\begin{description} +\errentry{ 0}{useless displacement} + {warning} + {680x0, 6809 and COP8 CPUs: an address displacement of 0 was + given. An address expression without displacement is + generated, and a convenient number of NOPs are emitted + to avoid phasing errors.} + {none} +\errentry{ 10}{short addressing possible} + {warning} + {680x0-, 6502 and 68xx CPUs: a given memory location can be + reached using short addressing. A short addressing + instruction is emitted, together with the required + number of NOPs to avoid phasing errors.} + {none} +\errentry{ 20}{short jump possible} + {warning} + {680x0- and 8086 CPUs can execute jumps using a short or long + displacement. If a shorter jump was not explicitly + requested, in the + first pass room for the long jump is reserved. Then the code + for the shorter jump is emitted, and the remaining space is + filled with NOPs to avoid phasing errors.} + {none} +\errentry{ 30}{no sharefile created, SHARED ignored} + {warning} + {A \tty{SHARED} directive was found, but on the command line no + options were specified, to generate a shared file.} + {none} +\errentry{ 40}{FPU possibly cannot read this value ($>$=1E1000)} + {warning} + {The BCD-floating point format used by the 680x0-FPU + allows such a large exponent, but according to the latest + databooks, this cannot be fully interpreted. The + corresponding word is assembled, but the associated + function is not expected to produce the correct result.} + {none} +\errentry{ 50}{privileged instruction} + {warning} + {A Supervisor-mode directive was used, that was not preceded + by an explicit \tty{SUPMODE ON} directive} + {none} +\errentry{ 60}{distance of 0 not allowed for short jump (NOP created instead)} + {warning} + {A short jump with a jump distance equal to 0 is not allowed + by 680x0 resp. COP8 processors, since the associated code word is + used to identify long jump instruction. Instead of a + jump instruction, AS emits a NOP} + {none} +\errentry{ 70}{symbol out of wrong segment} + {warning} + {The symbol used as an operand comes from an address space + that cannot be addressed together with the given instruction} + {none} +\errentry{ 75}{segment not accessible} + {warning} + {The symbol used as an operand belongs to an address space + that cannot be accessed with any of the segment registers of + the 8086} + {The name of the inaccessible segment} +\errentry{ 80}{change of symbol values forces additional pass} + {warning} + {A symbol changed value, with respect to previous pass. This + warning is emitted only if the \tty{-r} option is used.} + {name of the symbol that changed value.} +\errentry{ 90}{overlapping memory usage} + {warning} + {The analysis of the usage list shows that part of the + program memory was used more than once. The reason can be an + excessive usage of \tty{ORG} directives.} + {none} +\errentry{ 100}{none of the CASE conditions was true} + {warning} + {A \tty{SWITCH...CASE} directive without \tty{ELSECASE} clause was + executed, and none of the \tty{CASE} conditions was found + to be true.} + {none} +\errentry{ 110}{page might not be addressable} + {warning} + {The symbol used as an operand was not found in the memory + page defined by an \tty{ASSUME} directive (ST6, 78(C)10).} + {none} +\errentry{ 120}{register number must be even} + {warning} + {The CPU allows to concatenate only register pairs, whose + start address is even (RR0, RR2, ..., only for Z8).} + {none} +\errentry{ 130}{obsolete instruction, usage discouraged} + {warning} + {The instruction used, although supported, was superseded by + a new instruction. Future versions of the CPU could no more + implement the old instruction.} + {none} +\errentry{ 140}{unpredictable execution of this instruction} + {warning} + {The addressing mode used for this instruction is allowed, + however a register is used in such a way that its contents + cannot be predicted after the execution of the + instruction.} + {none} +\errentry{ 150}{localization operator senseless out of a section} + {warning} + {An aheaded \@ must be used, so that it is + explicitly referred to the local symbols used in the + section. When the operator is used out of a section, there + are no local symbols, because this operator is useless in + this context.} + {none} +\errentry{ 160}{senseless instruction} + {warning} + {The instruction used has no meaning, or it can be + substituted by an other instruction, shorter and more + rapidly executed.} + {none} +\errentry{ 170}{unknown symbol value forces additional pass} + {warning} + {AS expects a forward definition of a symbol, i.e. a symbol + was used before it was defined. A further pass must be + executed. This warning is emitted only if the \tty{-r} option was + used.} + {none} +\errentry{ 180}{address is not properly aligned} + {warning} + {An address was used that is not an exact multiple of the + operand size. Although the CPU databook forbids this, the + address could be stored in the instruction word, so AS + simply emits a warning.} + {none.} +\errentry{ 190}{I/O-address must not be used here} + {warning} + {The addressing mode or the address used are correct, but the + address refers to the peripheral registers, and it + cannot be used in this circumstance.} + {none.} +\errentry{ 200}{possible pipelining effects} + {warning} + {A register is used in a series of instructions, so that a + sequence of instructions probably does not generate the + desired result. This usually happens when a register is + used before its new content was effectively loaded in it.} + {the register probably causing the problem.} +\errentry{ 210}{multiple use of address register in one instruction} + {warning} + {A register used for the addressing is used once more in the + same instruction, in a way that results in a modification + of the register value. The resulting address does not have a + well defined value.} + {the register used more than once.} +\errentry{ 220}{memory location is not bit addressable} + {warning} + {Via a \tty{SFRB} statement, it was tried to declare a memory cell + as bit addressable which is not bit addressable due to the + 8051's architectural limits.} + {none} +\errentry{ 230}{stack is not empty} + {warning} + {At the end of a pass, a stack defined by the program is + not empty.} + {the name of the stack and its remaining depth} +\errentry{ 240}{NUL character in string, result is undefined} + {warning} + {A string constant contains a NUL character. Though this + works with the Pascal version, it is a problem for the + C version of AS since C itself terminates strings with + a NUL character. i.e. the string would have its end for + C just at this point...} + {none} +\errentry{ 250}{instruction crosses page boundary} + {warning} + {The parts of a machine statement partiallly lie on + different pages. As the CPU's instruction counter does + not get incremented across page boundaries, the processor + would fetch at runtime the first byte of the old page + instead of the instruction's following byte; the program + would execute incorrectly.} + {none} +\errentry{ 260}{range overflow} + {warning} + {A numeric value was out of the allowed range. AS brought + the value back into the allowed range by truncating upper + bits, but it is not guaranteed that meaningful and correct + code is generated by this.} + {none} +\errentry{ 270}{negative argument for DUP} + {warning} + {The repetition argument of a DUP directive was smaller + than 0. Analogous to a count of exactly 0, no data is + stored.} + {none} +\errentry{1000}{symbol double defined} + {error} + {A new value is assigned to a symbol, using a label or a + \tty{EQU, PORT, SFR, LABEL, SFRB} or \tty{BIT} instruction: however this + can be done only using \tty{SET/EVAL}.} + {the name of the offending symbol, and the line number where + it was defined for the first time, according to the symbol + table.} +\errentry{1010}{symbol undefined} + {error} + {A symbol is still not defined in the symbol table, also + after a second pass.} + {the name of the undefined symbol.} +\errentry{1020}{invalid symbol name} + {error} + {A symbol does not fulfill the requirements that symbols + must have to be considered valid by AS. Please pay + attention that more stringent syntax rules exist for + macros and function parameters.} + {the wrong symbol} +\errentry{1090}{invalid format} + {error} + {The instruction format used does not exist for this + instruction.} + {the known formats for this command} +\errentry{1100}{useless attribute} + {error} + {The instruction (processor or pseudo) cannot be used with a + point-suffixed attribute.} + {none} +\errentry{1105}{attribute may only be one character long} + {error} + {The attribute following a point after an instruction must + not be longer or shorter than one character.} + {none} +\errentry{1110}{wrong number of operands} + {error} + {The number of arguments issued for the instruction (processor or + pseudo) does not conform with the accepted number of + operands.} + {none} +\errentry{1115}{wrong number of operations} + {error} + {The number of options given with this command is not + correct.} + {none} +\errentry{1120}{addressing mode must be immediate} + {error} + {The instruction can be used only with immediate operands + (preceded by \tty{\#}).} + {none} +\errentry{1130}{invalid operand size} + {error} + {Although the operand is of the right type, it does not have + the correct length (in bits).} + {none} +\errentry{1131}{conflicting operand sizes} + {error} + {The operands used have different length (in bits)} + {none} +\errentry{1132}{undefined operand size} + {error} + {It is not possible to estimate, from the opcode and from + the operands, the size of the operand (a trouble with + 8086 assembly). You must define it with a \tty{BYTE or WORD} + \tty{PTR} prefix.} + {none} +\errentry{1135}{invalid operand type} + {error} + {an expression does not have a correct operand type + (integer/\-decimal/\-string)} + {the operand type} +\errentry{1140}{too many arguments} + {error} + {No more than 20 arguments can be given to any instruction} + {none} +\errentry{1200}{unknown opcode} + {error} + {An was used that is neither an AS instruction, nor a + known mnemonic for the current processor type.} + {none} +\errentry{1300}{number of opening/closing brackets does not match} + {error} + {The expression parser found an expression enclosed by + parentheses, where the number of opening and closing + parentheses does not match.} + {the wrong expression} +\errentry{1310}{division by 0} + {error} + {An expression on the right side of a division or modulus + operation was found to be equal to 0.} + {none} +\errentry{1315}{range underflow} + {error} + {An integer word underflowed the allowed range.} + {the value of the word and the allowed minimum (in most + cases, maybe I will complete this one day...)} +\errentry{1320}{range overflow} + {error} + {An integer word overflowed the allowed range.} + {the value of the world, and the allowed maximum (in most + cases, maybe I will complete this one day...)} +\errentry{1325}{address is not properly aligned} + {error} + {The given address does not correspond with the size needed + by the data transfer, i.e. it is not an integral multiple of + the operand size. Not all processor types can use unaligned + data.} + {none} +\errentry{1330}{distance too big} + {error} + {The displacement used for an address is too large.} + {none} +\errentry{1340}{short addressing not allowed} + {error} + {The address of the operand is outside of the address space + that can be accessed using short-addressing mode.} + {none} +\errentry{1350}{addressing mode not allowed here} + {error} + {the addressing mode used, although usually possible, + cannot be used here.} + {none} +\errentry{1351}{number must be even} + {error} + {At this point, only even addresses are allowed, since the + low order bit is used for other purposes or it is reserved.} + {none} +\errentry{1355}{addressing mode not allowed in parallel operation} + {error} + {The addressing mode(s) used are allowed in sequential, + but not in parallel instructions} + {none} +\errentry{1360}{undefined condition} + {error} + {The branch condition used for a conditional jump does not + exist.} + {none} +\errentry{1370}{jump distance too big} + {error} + {the jump instruction and destination are too apart to + execute the jump with a single step} + {none} +\errentry{1375}{jump distance is odd} + {error} + {Since instruction must only be located at even addresses, + the jump distance between two instructions must always be + even, and the LSB of the jump distance is used otherwise. + This issue was not verified here. The reason is usually the + presence of an odd number of data in bytes or a wrong + \tty{ORG}.} + {none} +\errentry{1380}{invalid argument for shifting} + {error} + {only a constant or a data register can be used for defining + the shift size. (only for 680x0)} + {none} +\errentry{1390}{operand must be in range 1..8} + {error} + {constants for shift size or \tty{ADDQ} argument can be only + within the 1..8 range (only for 680x0)} + {none} +\errentry{1400}{shift amplitude too big} + {error} + {(no more used)} + {none} +\errentry{1410}{invalid register list} + {error} + {The register list argument of \tty{MOVEM} or \tty{FMOVEM} has a + wrong format (only for 680x0)} + {none} +\errentry{1420}{invalid addressing mode for CMP} + {error} + {The operand combination used with the \tty{CMP} instruction is + not allowed (only for 680x0)} + {none} +\errentry{1430}{invalid CPU type} + {error} + {The processor type used as argument for \tty{CPU} command is + unknown to AS.} + {the unknown processor type} +\errentry{1440}{invalid control register} + {error} + {The control register used by a \tty{MOVEC} is not (yet) available + for the processor defined by the \tty{CPU} command.} + {none} +\errentry{1445}{invalid register} + {error} + {The register used, although valid, cannot be used in this + context.} + {none} +\errentry{1450}{RESTORE without SAVE} + {error} + {A \tty{RESTORE} command was found, that cannot be coupled with a + corresponding \tty{SAVE}.} + {none} +\errentry{1460}{missing RESTORE} + {error} + {After the assembling pass, a \tty{SAVE} command was missing.} + {none.} +\errentry{1465}{unknown macro control instruction} + {error} + {A macro option parameter is unknown to AS.} + {the dubious option.} +\errentry{1470}{missing ENDIF/ENDCASE} + {error} + {after the assembling, some of the \tty{IF}- or \tty{CASE}- constructs + were found without the closing command} + {none} +\errentry{1480}{invalid IF-structure} + {error} + {The command structure in a \tty{IF}- or \tty{SWITCH}- sequence is + wrong.} + {none} +\errentry{1483}{section name double defined} + {error} + {In this program module a section with the same name still + exists.} + {the multiple-defined name} +\errentry{1484}{unknown section} + {error} + {In the current scope, there are no sections with this name} + {the unknown name} +\errentry{1485}{missing ENDSECTION} + {error} + {Not all the sections were properly closed.} + {none} +\errentry{1486}{wrong ENDSECTION} + {error} + {The given \tty{ENDSECTION} does not refer to the most + deeply nested one.} + {none} +\errentry{1487}{ENDSECTION without SECTION} + {error} + {An \tty{ENDSECTION} command was found, but the associated section + was not defined before.} + {none} +\errentry{1488}{unresolved forward declaration} + {error} + {A symbol declared with a \tty{FORWARD} or \tty{PUBLIC} statement could + not be resolved.} + {the name of the unresolved symbol.} +\errentry{1489}{conflicting FORWARD $<->$ PUBLIC-declaration} + {error} + {A symbol was defined both as public and private.} + {the name of the symbol.} +\errentry{1490}{wrong numbers of function arguments} + {error} + {The number of arguments used for referencing a function + does not match the number of arguments defined in the + function definition.} + {none} +\errentry{1495}{unresolved literals (missing LTORG)} + {error} + {At the end of the program, or just before switching to + another processor type, unresolved literals still remain.} + {none} +\errentry{1500}{instruction not allowed on} + {error} + {Although the instruction is correct, it cannot be used with + the selected member of the CPU family.} + {none} +\errentry{1505}{addressing mode not allowed on} + {error} + {Although the addressing mode used is correct, it cannot be + used with the selected member of the CPU family.} + {none} +\errentry{1510}{invalid bit position} + {error} + {Either the number of bits specified is not allowed, or + the command is not completely specified.} + {none} +\errentry{1520}{only ON/OFF allowed} + {error} + {This pseudo command accepts as argument either \tty{ON} or + \tty{OFF}} + {none} +\errentry{1530}{stack is empty or undefined} + {error} + {It was tried to access a stack via a \tty{POPV} instruction + that was either never defined or already emptied.} + {the name of the stack in question} +\errentry{1540}{not exactly one bit set} + {error} + {Not exactly one bit was set in a mask passed to the + \tty{BITPOS} function.} + {none} +\errentry{1550}{ENDSTRUCT without STRUCT} + {error} + {An \tty{ENDSTRUCT} instruction was found though there is + currently no structure definition in progress.} + {none} +\errentry{1551}{open structure definition} + {error} + {After end of assembly, not all \tty{STRUCT} instructions + have been closed with appropriate \tty{ENDSTRUCT}s.} + {the innermost, unfinished structure definition} +\errentry{1552}{wrong ENDSTRUCT} + {error} + {the name parameter of an \tty{ENDSTRUCT} instruction does + not correspond to the innermost open structure + definition.} + {none} +\errentry{1553}{phase definition not allowed in structure definition} + {error} + {What should I say about that? \tty{PHASE} inside a record + simply does not make sense and only leads to + confusion...} + {none} +\errentry{1554}{invalid \tty{STRUCT} directive} + {error} + {Only \tty{EXTNAMES} resp. \tty{NOEXTNAMES} are allowed as + directives of a \tty{STRUCT} statement.} + {the unknown directive} +\errentry{1600}{unexpected end of file} + {error} + {It was tried to read past the end of a file with a + \tty{BINCLUDE} statement.} + {none} +\errentry{1700}{ROM-offset must be in range 0..63} + {error} + {The ROM table of the 680x0 coprocessor has only 64 entries.} + {none} +\errentry{1710}{invalid function code} + {error} + {The only function code arguments allowed are SFC, DFC, a + data register, or a constant in the interval of 0..15 (only + for 680x0 MMU).} + {none} +\errentry{1720}{invalid function code mask} + {error} + {Only a number in the interval 0..15 can be used as + function code mask (only for 680x0 MMU)} + {none} +\errentry{1730}{invalid MMU register} + {error} + {The MMU does not have a register with this name (only for + 680x0 MMU).} + {none} +\errentry{1740}{level must be in range 0..7} + {error} + {The level for \tty{PTESTW} and \tty{PTESTR} must be a constant in the + range of 0...7 (only for 680x0 MMU).} + {none} +\errentry{1750}{invalid bit mask} + {error} + {The bit mask used for a bit field command has a wrong + format (only for 680x0).} + {none} +\errentry{1760}{invalid register pair} + {error} + {The register here defined cannot be used in this context, + or there is a syntactic error (only for 680x0).} + {none} +\errentry{1800}{open macro definition} + {error} + {An incomplete macro definition was found. Probably an + \tty{ENDM} was forgotten.} + {none} +\errentry{1805}{EXITM not called from within macro} + {error} + {\tty{EXITM} is designed to terminate a macro expansion. This + instruction only makes sense within macros and an attempt + was made to call it in the absence of macros.} + {none} +\errentry{1810}{more than 10 macro parameters} + {error} + {A macro cannot have more than 10 parameters} + {none} +\errentry{1815}{macro double defined} + {error} + {A macro was defined more than once in a program section.} + {the multiply defined macro name.} +\errentry{1820}{expression must be evaluatable in first pass} + {error} + {The command used has an influence on the length of the + emitted code, so that forward references cannot be resolved + here.} + {none} +\errentry{1830}{too many nested IFs} + {error} + {(no more implemented)} + {none} +\errentry{1840}{ELSEIF/ENDIF without IF} + {error} + {A \tty{ELSEIF}- or \tty{ENDIF}- command was found, that is not preceded + by an \tty{IF}- command.} + {none} +\errentry{1850}{nested / recursive macro call} + {error} + {(no more implemented)} + {none} +\errentry{1860}{unknown function} + {error} + {The function invoked was not defined before.} + {The name of the unknown function} +\errentry{1870}{function argument out of definition range} + {error} + {The argument does not belong to the allowed argument range + associated to the referenced function.} + {none} +\errentry{1880}{floating point overflow} + {error} + {Although the argument is within the range allowed to the + function arguments, the result is not valid} + {none} +\errentry{1890}{invalid value pair} + {error} + {The base-exponent pair used in the expression cannot be + computed} + {none} +\errentry{1900}{instruction must not start on this address} + {error} + {No jumps can be performed by the selected CPU from this + address.} + {none} +\errentry{1905}{invalid jump target} + {error} + {No jumps can be performed by the selected CPU to this + address.} + {none} +\errentry{1910}{jump target not on same page} + {error} + {Jump command and destination must be in the same memory + page.} + {none} +\errentry{1920}{code overflow} + {error} + {An attempt was made to generate more than 1024 code or + data bytes in a single memory page.} + {none} +\errentry{1925}{address overflow} + {error} + {The address space for the processor type actually used was + filled beyond the maximum allowed limit.} + {none} +\errentry{1930}{constants and placeholders cannot be mixed} + {error} + {Instructions that reserve memory, and instructions that define + constants cannot be mixed in a single pseudo instruction.} + {none} +\errentry{1940}{code must not be generated in structure definition} + {error} + {a \tty{STRUCT} construct is only designed to describe a + data structure and not to create one; therefore, no + instructions are allowed that generate code.} + {none} +\errentry{1950}{parallel construct not possible here} + {error} + {Either these instructions cannot be executed in parallel, + or they are not close enough each other, to do parallel + execution.} + {none} +\errentry{1960}{invalid segment} + {error} + {The referenced segment cannot be used here.} + {The name of the segment used.} +\errentry{1961}{unknown segment} + {error} + {The segment referenced with a \tty{SEGMENT} command does not + exist for the CPU used.} + {The name of the segment used} +\errentry{1962}{unknown segment register} + {error} + {The segment referenced here does not exist (8086 only)} + {none} +\errentry{1970}{invalid string} + {error} + {The string has an invalid format.} + {none} +\errentry{1980}{invalid register name} + {error} + {The referenced register does not exist, or it cannot + be used here.} + {none} +\errentry{1985}{invalid argument} + {error} + {The command used cannot be performed with the \tty{REP}-prefix.} + {none} +\errentry{1990}{indirect mode not allowed} + {error} + {Indirect addressing cannot be used in this way} + {none} +\errentry{1995}{not allowed in current segment} + {error} + {(no more implemented)} + {none} +\errentry{1996}{not allowed in maximum mode} + {error} + {This register can be used only in minimum mode} + {none} +\errentry{1997}{not allowed in minimum mode} + {error} + {This register can be used only in maximum mode} + {none} +\errentry{2000}{invalid combination of prefixes} + {error} + {The prefix combination here defined is not allowed, or it + cannot be translated into binary code} + {none} +\errentry{2010}{invalid escape sequence} + {error} + {The special character defined using a backslash sequence + is not defined} + {none} +\errentry{10001}{error in opening file} + {fatal} + {An error was detected while trying to open a file for input.} + {description of the I/O error} +\errentry{10002}{error in writing listing} + {fatal} + {An error happened while AS was writing the listing file.} + {description of the I/O error} +\errentry{10003}{file read error} + {fatal} + {An error was detected while reading a source file.} + {description of the I/O error} +\errentry{10004}{file write error} + {fatal} + {While AS was writing a code or share file, an error happened.} + {description of the I/O error} +\errentry{10006}{heap overflow} + {fatal} + {The memory available is not enough to store all the data + needed by AS. Try using the DPMI or OS/2 version of AS.} + {none} +\errentry{10007}{stack overflow} + {fatal} + {The program stack crashed, because too complex formulas, or + a bad disposition of symbols and/or macros were used. Try + again, using AS with the option \tty{-A}.} + {none} +\end{description} + +%%=========================================================================== + +\cleardoublepage +\chapter{I/O Error Messages} + +The following error messages are generated not only by AS, but also by +the auxiliary programs, like PLIST, BIND, P2HEX, and P2BIN. Only the most +probable error messages are here explained. Should you meet an undocumented +error message, then you probably met a program bug! Please inform us +immediately about this!! + +\begin{description} +\item[2]{file not found\\ + The file requested does not exist, or it is stored on another + drive.} +\item[3]{path not found\\ + The path of a file does not exist, or it is on another drive.} +\item[4]{too much open files\\ + There are no more file handles available to DOS. Increase + their number changing the value associated to \tty{FILES=} in the file + \tty{CONFIG.SYS}.} +\item[5]{file access not allowed\\ + Either the network access rights do not allow the file access, or + an attempt was done to rewrite or rename a protected file.} +\item[6]{invalid file handler} +\item[12]{invalid access mode} +\item[15]{invalid drive letter\\ + The required drive does not exist.} +\item[16]{The file cannot be deleted} +\item[17]{RENAME cannot be done on this drive} +\item[100]{Unexpected end of file\\ + A file access tried to go beyond the end of file, although according + to its structure this should not happen. The file is probably + corrupted.} +\item[101]{disk full\\ + This is self explaining! Please, clean up !} +\item[102]{ASSIGN failed} +\item[103]{file not open} +\item[104]{file not open for reading} +\item[105]{file not open for writing} +\item[106]{invalid numerical format} +\item[150]{the disk is write-protected\\ + When you don't use a hard disk as work medium storage, you should + sometimes remove the protecting tab from your diskette!} +\item[151]{unknown device\\ + you tried to access a peripheral unit that is unknown to DOS. This + should not usually happen, since the name should be automatically + interpreted as a filename.} +\item[152]{drive not ready\\ + close the disk drive door.} +\item[153]{unknown DOS function} +\item[154]{invalid disk checksum\\ + A bad read error on the disk. Try again; if nothing changes, + reformat the floppy disk resp. begin to take care of your hard + disk!} +\item[155]{invalid FCB} +\item[156]{position error\\ + the diskette/hard disk controller has not found a disk track. See + nr. 154 !} +\item[157]{format unknown\\ + DOS cannot read the diskette format} +\item[158]{sector not found\\ + As nr. 156, but the controller this time could not find a disk + sector in the track.} +\item[159]{end of paper\\ + You probably redirected the output of AS to a printer. Assembler + printout can be veeery long...} +\item[160]{device read error\\ + The operating system detected an unclassificable read error} +\item[161]{device write error\\ + The operating system detected an unclassificable write error} +\item[162]{general failure error\\ + The operating system has absolutely no idea of what happened to the + device.} +\end{description} + +%%=========================================================================== + +\cleardoublepage +\chapter{Frequently Asked Questions} + +In this chapter, I tried to collect some questions that arise very often +together with their answers. Answers to the problems presented in +this chapter might also be found at other places in this manual, but +one maybe does not find them immediately... + +\begin{description} +\item[Q:]{I am fed up with DOS. Are there versions of AS for other + operating systems ?} +\item[A:]{Apart from the protected mode version that offers more memory when + working under DOS, ports exist for OS/2 and Unix systems like + Linux (currently in test phase). Versions that help operating + system manufacturers located in Redmont to become even richer are + currently not planned. I will gladly make the sources of AS + available for someone else who wants to become active in this + direction. The C variant is probably the best way to start a + port into this direction. He should however not expect support + from me that goes beyond the sources themselves...} +\vspace{0.3cm} +\item[Q:]{Is a support of the XYZ processor planned for AS?} +\item[A:]{New processors are appearing all the time and I am trying to keep + pace by extending AS. The stack on my desk labeled ''undone'' + however never goes below the 4 inch watermark... Wishes coming + from users of course play an important role in the decision which + candidates will be done first. The internet and the rising amount + of documentation published in electronic form make the acquisition + of data books easier than it used to be, but it always becomes + difficult when more exotic or older architectures are wanted. If + the processor family in question is not in the list of families + that are planned (see chapter 1), adding a data book to a request + will have a highly positive influence. Borrowing books is also + fine.} +\vspace{0.3cm} +\item[Q:]{Having a free assembler is really fine, but I now also had use for + a disassembler...and a debugger...a simulator would also really be + cool!} +\item[A:]{AS is a project I work on in leisure time, the time I have when I + do not have to care of how to make my living. AS already takes a + significant portion of that time, and sometimes I make a time-out + to use my soldering iron, enjoy a Tangerine Dream CD, watch TV, or + simply to fulfill some basic human needs... I once started to + write the concept of a disassembler that was designed to create + source code that can be assembled and that automatically + separates code and data areas. I quickly stopped this project + again when I realized that the remaining time simply did not + suffice. I prefer to work on one good program than to struggle for + half a dozen of mediocre apps. Regarded that way, the answer to + the question is unfortunately ''no''...} +\vspace{0.3cm} +\item[Q:]{The screen output of AS is messed up with strange characters, e.g. + arrows and brackets. Why?} +\item[A:]{AS will by default use some ANSI control sequences for screen + control. These sequences will appear unfiltered on your screen + if you did not install an ANSI driver. Either install an ANSI + driver or use the DOS command \tty{SET USEANSI=N} to turn the + sequences off.} +\vspace{0.3cm} +\item[Q:]{AS suddenly terminates with a stack overflow error while + assembling my program. Did my program become to large?} +\item[A:]{Yes and No. Your program's symbol table has grown a bit + unsymmetrically what lead to high recursion depths while accessing + the table. Errors of this type especially happen in the + 16-bit-OS/2 version of AS which has a very limited stack area. + Restart AS with the \tty{-A} command line switch. If this does not + help, too complex formula expression are also a possible cause of + stack overflows. In such a case, try to split the formula into + intermediate steps.} +\vspace{0.3cm} +\item[Q:]{It seems that AS does not assemble my program up to the end. It + worked however with an older version of AS (1.39).} +\item[A:]{Newer versions of AS no longer ignore the \tty{END} statement; they + actually terminate assembly when an \tty{END} is encountered. + Especially older include files made by some users tended to + contain an \tty{END} statement at their end. Simply remove the + superfluous \tty{END} statements.} +\vspace{0.3cm} +\item[Q:]{I made an assembly listing of my program because I had some more + complicated assembly errors in my program. Upon closer + investigation of the listing, I found that some branches do not + point to the desired target but instead to themselves!} +\item[A:]{This effect happens in case of forward jumps in the first pass. + The formula parser does not yet have the target address in its symbol + table, and as it is a completely independent module, it has to think of + a value that even does not hurt relative branches with short displacement + lengths. This is the current program counter itself...in the + second pass, the correct values would have appeared, but the second + pass did not happen due to errors in the first one. Correct the + other errors first so that AS gets into the second pass, and the + listing should look more meaningful again.} +\vspace{0.3cm} +\item[Q:]{Assembly of my program works perfectly, however I get an empty + file when I try to convert it with P2HEX or P2BIN.} +\item[A:]{You probably did not set the address filter correctly. This + filter by default cuts out an area ranging from 0 to 32 Kbytes. + If you program contains memory chunks outside this range, they + will be ignored. If your code is completely beyond the 32K + barrier (this is commonplace for processors of the 65xx and 68xx + series), you will get the result you just described. Simply + set the address filter to a range that suits your needs (see the + chapter dealing with P2BIN/P2HEX).} +\vspace{0.3cm} +\item[Q:]{I cannot enter the dollar character when using P2BIN or P2HEX + under Unix. The automatic address range setting does not work, instead + I get strange error messages.} +\item[A:]{Unix shells use the dollar character for expansion of shell + variables. If you want to pass a dollar character to an application, + prefix it with a backslash (\verb!\!). In the special case of the + address range specification for P2HEX and P2BIN, you may also use + \tty{0x} instead of the dollar character, which removes this prblen + completely.} +\end{description} + +%%=========================================================================== + +\cleardoublepage +\chapter{Pseudo-Instructions Collected} + +This appendix is designed as a quick reference to look up all pseudo +instructions provided by AS. The list is ordered in two parts: The +first part lists the instructions that are always available, and this +list is followed by lists that enumerate the instructions +additionally available for a certain processor family. + +\subsubsection{Instructions that are always available} +\input{../doc_DE/pscomm.tex} +There is an additional \tty{SET} resp. \tty{EVAL} instruction (in case +\tty{SET} is already a machine instruction). + +\input{../doc_DE/pscpu.tex} + +%%=========================================================================== + +\cleardoublepage +\chapter{Predefined Symbols} +\label{AppInternSyms} + +\begin{table*}[htb] +\begin{center}\begin{tabular}{|l|l|l|l|} +\hline +name & data type & definition & meaning \\ +\hline +\hline +ARCHITECTURE & string & predef. & target platform AS was \\ + & & & compiled for, in the style \\ + & & & processor-manufacturer- \\ + & & & operating system \\ +BIGENDIAN & boolean & dyn.(0) & storage of constants MSB \\ + & & & first ? \\ +CASESENSITIVE & boolean & normal & case sensitivity in symbol \\ + & & & names ? \\ +CONSTPI & float & normal & constant Pi (3.1415.....) \\ +DATE & string & predef. & date of begin of assembly \\ +FALSE & boolean & predef. & 0 = logically ''false'' \\ +HASFPU & boolean & dyn.(0) & coprocessor instructions \\ + & & & enabled ? \\ +HASPMMU & boolean & dyn.(0) & MMU instructions enabled ? \\ +INEXTMODE & boolean & dyn.(0) & XM flag set for 4 Gbyte \\ + & & & address space ? \\ +INLWORDMODE & boolean & dyn.(0) & LW flag set for 32 bit \\ + & & & instructions ? \\ +INMAXMODE & boolean & dyn.(0) & processor in maximum \\ + & & & mode ? \\ +INSUPMODE & boolean & dyn.(0) & processor in supervisor \\ + & & & mode ? \\ +\hline +\end{tabular}\end{center} +\caption{Predefined Symbols - Part 1\label{TabInternSyms1}} +\end{table*} + +\begin{table*}[htb] +\begin{center}\begin{tabular}{|l|l|l|l|} +\hline +name & data type & definition & meaning \\ +\hline\hline +INSRCMODE & boolean & dyn.(0) & processor in source mode ? \\ +FULLPMMU & boolean & dyn.(0/1) & full PMMU instruction set \\ + & & & allowed ? \\ +LISTON & boolean & dyn.(1) & listing enabled ? \\ +MACEXP & boolean & dyn.(1) & expansion of macro con- \\ + & & & structs in listing enabled ? \\ +MOMCPU & integer & dyn. & number of target CPU \\ + & & (68008) & currently set \\ +MOMCPUNAME & string & dyn. & name of target CPU \\ + & & (68008) & currently set \\ +MOMFILE & string & special & current source file \\ + & & & (including include files) \\ +MOMLINE & integer & special & current line number in \\ + & & & source file \\ +MOMPASS & integer & special & number of current pass \\ +MOMSECTION & string & special & name of current section or \\ + & & & empty string if out of any \\ + & & & section \\ +MOMSEGMENT & string & special & name of address space \\ + & & & currently selected \\ + & & & with \tty{SEGMENT} \\ + & & & \\ +\hline +\end{tabular}\end{center} +\caption{Predefined Symbols - Part 2\label{TabInternSyms2}} +\end{table*} + +\begin{table*}[htb] +\begin{center}\begin{tabular}{|l|l|l|l|} +\hline +name & data type & definition & meaning \\ +\hline\hline +PADDING & boolean & dyn.(1) & pad byte field to even \\ + & & & count ? \\ +RELAXED & boolean & dyn.(0) & any syntax allowed integer \\ + & & & constants ? \\ +PC & integer & special & curr. program counter \\ + & & & (Thomson) \\ +TIME & string & predef. & time of begin of assembly \\ + & & & (1. pass) \\ +TRUE & integer & predef. & 1 = logically ''true'' \\ +VERSION & integer & predef. & version of AS in BCD \\ + & & & coding, e.g. 1331 hex for \\ + & & & version 1.33p1 \\ +WRAPMODE & Integer & predef. & shortened program counter \\ + & & & assumed? \\ +* & integer & special & curr. program counter \\ + & & & (Motorola, Rockwell, Micro- \\ + & & & chip, Hitachi) \\ +\$ & integer & special & curr. program counter (Intel, \\ + & & & Zilog, Texas, Toshiba, NEC, \\ + & & & Siemens, AMD) \\ +\hline +\end{tabular}\end{center} +\caption{Predefined Symbols - Part 3\label{TabInternSyms3}} +\end{table*} + +To be exact, boolean symbols are just ordinary integer symbols with the +difference that AS will assign only two different values to them (0 or 1, +corresponding to False or True). AS does not store special symbols +in the symbol table. For performance reasons, they are realized with +hardcoded comparisons directly in the parser. They therefore do not +show up in the assembly listing's symbol table. Predefined symbols +are only set once at the beginning of a pass. The values of dynamic +symbols may in contrast change during assembly as they reflect +settings made with related pseudo instructions. The values added in +parentheses give the value present at the beginning of a pass. + +The names given in this table also reflect the valid way to reference +these symbols in case-sensitive mode. + +The names listed here should be avoided for own symbols; either one +can define but not access them (special symbols), or one will receive +an error message due to a double-defined symbol. The ugliest case is +when the redefinition of a symbol made by AS at the beginning of a +pass leads to a phase error and an infinite loop... + +%%=========================================================================== + +\cleardoublepage +\chapter{Shipped Include Files} + +The distribution of AS contains a couple of include files. Apart from +include files that only refer to a specific processor family (and whose +function should be immediately clear to someone who works with this +family), there are a few processor-independent files which include useful +functions. The functions defined in these files shall be explained +briefly in the following sections: + +\section{BITFUNCS.INC} + +This file defines a couple of bit-oriented functions that might be +hardwired for other assemblers. In the case of AS however, thaey are +implemented with the help of user-defined functions: + +\begin{itemize} +\item{{\em mask(start,bits)} returns an integer with {\em bits} bits set + starting at position {\em start};} +\item{{\em invmask(start,bits)} returns one's complement to {\em + mask()};} +\item{{\em cutout(x,start,bits)} returns {\em bits} bits masked out from + {\em x} starting at position {\em start} without shifting them to + position 0;} +\item{{\em hi(x)} returns the second lowest byte (bits 8..15) of {\em + x};} +\item{{\em lo(x)} returns the lowest byte (bits 8..15) of {\em x};} +\item{{\em hiword(x)} returns the second lowest word (bits 16..31) of + {\em x};} +\item{{\em loword(x)} returns the lowest word (bits 0..15) of {\em x};} +\item{{\em odd(x)} returns TRUE if {\em x} is odd;} +\item{{\em even(x)} returns TRUE if {\em x} is even;} +\item{{\em getbit(x,n)} extracts bit {\em n} out of {\em x} and returns + it as 0 or 1;} +\item{{\em shln(x,size,n)} shifts a word {\em x} of length {\em size} to + the left by {\em n} places;} +\item{{\em shrn(x,size,n)} shifts a word {\em x} of length {\em size} to + the right by {\em n} places;} +\item{{\em rotln(x,size,n)} rotates the lowest {\em size} bits of an + integer {\em x} to the left by {\em n} places;} +\item{{\em rotrn(x,size,n)} rotates the lowest {\em size} bits of an + integer {\em x} to the right by {\em n} places;} +\end{itemize} + +\section{CTYPE.INC} + +This include file is similar to the C include file {\tt ctype.h} which +offers functions to classify characters. All functions deliver either +TRUE or FALSE: + +\begin{itemize} +\item{{\em isdigit(ch)} becomes TRUE if {\em ch} is a valid decimal + digit (0..9);} +\item{{\em isxdigit(ch)} becomes TRUE if {\em ch} is a valid hexadecimal + digit (0..9, A..F, a..f);} +\item{{\em isupper(ch)} becomes TRUE if {\em ch} is an uppercase + letter, excluding special national characters);} +\item{{\em islower(ch)} becomes TRUE if {\em ch} is a lowercase + letter, excluding special national characters);} +\item{{\em isalpha(ch)} becomes TRUE if {\em ch} is a letter, excluding + special national characters);} +\item{{\em isalnum(ch)} becomes TRUE if {\em ch} is either a letter or + a valid decimal digit;} +\item{{\em isspace(ch)} becomes TRUE if {\em ch} is an 'empty' character + (space, form feed, line feed, carriage return, tabulator);} +\item{{\em isprint(ch)} becomes TRUE if {\em ch} is a printable character, + i.e. no control character up to code 31;} +\item{{\em iscntrl(ch)} is the opposite to {\em isprint()};} +\item{{\em isgraph(ch)} becomes TRUE if {\em ch} is a printable and + visible character;} +\item{{\em ispunct(ch)} becomes TRUE if {\em ch} is a printable special + character (i.e. neither space nor letter nor number);} +\end{itemize} + +%%=========================================================================== + +\cleardoublepage +\chapter{Acknowledgments} + +\begin{quote}\it +''If I have seen farther than other men, \\ +it is because I stood on the shoulders of giants.'' \\ +\hspace{2cm} --Sir Isaac Newton +\rm\end{quote} +\begin{quote}\it +''If I haven't seen farther than other men, \\ +it is because I stood in the footsteps of giants.'' \\ +\hspace{2cm} --unknown +\rm\end{quote} +\par +If one decides to rewrite a chapter that has been out of date for two +years, it is almost unavoidable that one forgets to mention some of +the good ghosts who contributed to the success this project had up +to now. The first ''thank you'' therefore goes to the people whose +names I unwillingly forgot in the following enumeration! + +The concept of AS as a universal cross assembler came from Bernhard +(C.) Zschocke who needed a ''student friendly'', i.e. free cross +assembler for his microprocessor course and talked me into extending +an already existing 68000 assembler. The rest is history... +The microprocessor course held at RWTH Aachen also always provided the +most engaged users (and bug-searchers) of new AS features and +therefore contributed a lot to today's quality of AS. + +The internet and FTP have proved to be a big help for spreading AS and +reporting of bugs. My thanks therefore go to the FTP admins (Bernd +Casimir in Stuttgart, Norbert Breidor in Aachen, and J\"urgen Mei\ss\-burger +in J\"ulich). Especially the last one personally engaged a lot to +establish a practicable way in J\"ulich. + +As we are just talking about the ZAM: Though Wolfgang E. Nagel is not +personally involved into AS, he is at least my boss and always puts +at least four eyes on what I am doing. Regarding AS, there seems to +be at least one that smiles... + +A program like AS cannot be done without appropriate data books and +documentation. I received information from an enormous amount of +people, ranging from tips up to complete data books. An enumeration +follows (as stated before, without guarantee for completelessness!): + +Ernst Ahlers, Charles Altmann, Rolf Buchholz, Bernd Casimir, +Gunther Ewald, Stephan Hruschka, Peter Kliegelh\"ofer, Ulf Meinke, +Matthias Paul, Norbert Rosch, Steffen Schmid, Leonhard Schneider, +Ernst Schwab, Michael Schwingen, Oliver Sellke, Christian Stelter, +Oliver Thamm, Thorsten Thiele. + +...and an ironic ''thank you'' to Rolf-Dieter-Klein and Tobias Thiel who +demonstrated with their ASM68K how one should \bb{not} do it and thereby +indirectly gave me the impulse to write something better! + +I did not entirely write AS on my own. AS contains the OverXMS +routines from Wilbert van Leijen which can move the overlay modules +into the extended memory. A really nice library, easy to use without +problems! + +The TMS320C2x/5x code generators and the file \tty{STDDEF2x.INC} come +from Thomas Sailer, ETH Zurich. It's surprising, he only needed one +weekend to understand my coding and to implement the new code generator. +Either that was a long nightshift or I am slowly getting old... + +%%=========================================================================== + +\cleardoublepage +\chapter{Changes since Version 1.3} + +\begin{itemize} +\item{version 1.31: + \begin{itemize} + \item{additional MCS-51 processor type 80515. The number + is again only stored by the assembler. The file + \tty{STDDEF51.INC} was extended by the necessary SFRs. + \bb{CAUTION!} Some of the 80515 SFRs have moved to other + addresses!} + \item{additional support for the Z80 processor;} + \item{faster 680x0 code generator.} + \end{itemize}} +\item{version 1.32: + \begin{itemize} + \item{syntax for zero page addresses for the 65xx family + was changed from \tty{addr.z} to \tty{$<$addr} (similar to 68xx);} + \item{additional support for the 6800, 6805, 6301, and + 6811 processors;} + \item{the 8051 part now also understands \tty{DJNZ, PUSH}, and + \tty{POP} (sorry);} + \item{the assembly listing now not also list the symbols + but also the macros that have been defined;} + \item{additional instructions \tty{IFDEF/IFNDEF} for conditional + assembly based on the existence of a symbol;} + \item{additional instructions \tty{PHASE/DEPHASE} to support code + that shall be moved at runtime to a different address;} + \item{additional instructions \tty{WARNING, ERROR}, and \tty{FATAL} to print + user-defined error messages;} + \item{the file \tty{STDDEF51.INC} additionally contains the macro + \tty{USING} to simplify working with the MCS-51's register + banks;} + \item{command line option \tty{u} to print segment usage;} + \end{itemize}} +\item{version 1.33: + \begin{itemize} + \item{additionally supports the 6809 processor;} + \item{added string variables;} + \item{The instructions \tty{TITLE, PRTINIT, PRTEXIT, ERROR}, + \tty{WARNING}, and \tty{FATAL} now expect a string expression. + Constants therefore now have to be enclosed in + '' instead of ' characters. This is also true + for \tty{DB}, \tty{DC.B}, and \tty{BYT};} + \item{additional instruction \tty{ALIGN} to align the program + counter for Intel processors;} + \item{additional instruction \tty{LISTING} to turn the generation + of an assembly listing on or off;} + \item{additional instruction \tty{CHARSET} for user-defined + character sets.} + \end{itemize}} +\item{version 1.34: + \begin{itemize} + \item{the second pass is now omitted if there were errors + in the first pass;} + \item{additional predefined symbol \tty{VERSION} that contains + the version number of AS;} + \item{additional instruction \tty{MESSAGE} to generate additional + messages under program control;} + \item{formula parser is now accessible via string constants;} + \item{if an error in a macro occurs, additionally the line + number in the macro itself is shown;} + \item{additional function \tty{UPSTRING} to convert a string to + all upper-case.} + \end{itemize}} +\item{version 1.35: + \begin{itemize} + \item{additional function \tty{TOUPPER} to convert a single + character to upper case;} + \item{additional instruction \tty{FUNCTION} for user-defined + functions;} + \item{additional command line option \tty{D} to define symbols + from outside;} + \item{the environment variable \tty{ASCMD} for commonly used + command line options was introduced;} + \item{the program will additionally be checked for double + usage of memory areas if the u option is enabled;} + \item{additional command line option \tty{C} to generate a cross + reference list.} + \end{itemize}} +\item{version 1.36: + \begin{itemize} + \item{additionally supports the PIC16C5x and PIC17C4x + processor families;} + \item{the assembly listing additionally shows the nesting + depth of include files;} + \item{the cross reference list additionally shows the + definition point of a symbol;} + \item{additional command line option \tty{A} to force a more + compact layout of the symbol table.} + \end{itemize}} +\item{version 1.37: + \begin{itemize} + \item{additionally supports the processors 8086, 80186, + V30, V35, 8087, and Z180;} + \item{additional instructions \tty{SAVE} and \tty{RESTORE} for an + easier switching of some flags;} + \item{additional operators for logical shifts and bit + mirroring;} + \item{command line options may now be negated with a + plus sign;} + \item{additional filter AS2MSG for a more comfortable + work with AS under Turbo-Pascal 7.0;} + \item{\tty{ELSEIF} now may have an argument for construction + of \tty{IF\--THEN\--ELSE} ladders;} + \item{additional \tty{CASE} construct for a more comfortable + conditional assembly;} + \item{user-defined functions now may have more than one + argument;} + \item{P2HEX can now additionally generate hex files in + a format suitable for 65xx processors;} + \item{BIND, P2HEX, and P2BIN now have the same scheme + for command line processing like AS;} + \item{additional switch \tty{i} for P2HEX to select one out + three possibilities for the termination record;} + \item{additional functions \tty{ABS} and \tty{SGN};} + \item{additional predefined symbols \tty{MOMFILE} and \tty{MOMLINE};} + \item{additional option to print extended error messages;} + \item{additional instruction \tty{IFUSED} and \tty{IFNUSED} to check + whether a symbol has been used so far;} + \item{The environment variables \tty{ASCMD, BINDCMD} etc. now + optionally may contain the name of a file that + provides more space for options;} + \item{P2HEX can now generate the hex formats specified + by Microchip (p4);} + \item{a page length specification of 0 now allows to + suppress automatic formfeeds in the assembly listing + completely (p4);} + \item{symbols defined in the command line now may be + assigned an arbitrary value (p5).} + \end{itemize}} +\item{version 1.38: + \begin{itemize} + \item{changed operation to multipass mode. This enables + AS to generate optimal code even in case of forward + references;} + \item{the 8051 part now also knows the generic \tty{JMP} and + \tty{CALL} instructions;} + \item{additionally supports the Toshiba TLCS-900 series + (p1);} + \item{additional instruction \tty{ASSUME} to inform the assembler + about the 8086's segment register contents (p2);} + \item{additionally supports the ST6 series from + SGS-Thomson (p2);} + \item{..and the 3201x signal processors from Texas + Instruments (p2);} + \item{additional option \tty{F} for P2HEX to override the + automatic format selection (p2);} + \item{P2BIN now can automatically set the start resp. + stop address of the address window by specifying + dollar signs (p2);} + \item{the 8048 code generator now also knows the 8041/42 + instruction extensions (p2);} + \item{additionally supports the Z8 microcontrollers (p3).} + \end{itemize}} +\item{version 1.39: + \begin{itemize} + \item{additional opportunity to define sections and local + symbols;} + \item{additional command line switch \tty{h} to force hexadecimal + numbers to use lowercase;} + \item{additional predefined symbol \tty{MOMPASS} to read the + number of the currently running pass;} + \item{additional command line switch \tty{t} to disable + individual parts of the assembly listing;} + \item{additionally knows the L variant of the TLCS-900 + series and the MELPS-7700 series from Mitsubishi + (p1);} + \item{P2HEX now also accepts dollar signs as start resp. + stop address (p2);} + \item{additionally supports the TLCS-90 family from + Toshiba (p2);} + \item{P2HEX now also can output data in Tektronix and + 16 bit Intel Hex format (p2);} + \item{P2HEX now prints warnings for address overflows + (p2);} + \item{additional include file \tty{STDDEF96.INC} with address + definitions for the TLCS-900 series (p3);} + \item{additional instruction \tty{READ} to allow interactive + input of values during assembly (p3);} + \item{error messages are written to the STDERR channel + instead of standard output (p3);} + \item{the \tty{STOP} instruction missing for the 6811 is now + available (scusi, p3);} + \item{additionally supports the $\mu$PD78(C)1x family from + NEC (p3);} + \item{additionally supports the PIC16C84 from NEC (p3);} + \item{additional command line switch \tty{E} to redirect error + messages to a file (p3);} + \item{The MELPS-7700's 'idol' 65816 is now also available + (p4);} + \item{the ST6 pseudo instruction \tty{ROMWIN} has been removed + was integrated into the \tty{ASSUME} instruction (p4);} + \item{additionally supports the 6804 from SGS-Thomson (p4);} + \item{via the \tty{NOEXPORT} option in a macro definition, it is + now possible to define individually for every macro + whether it shall appear in the \tty{MAC} file or not (p4);} + \item{the meaning of \tty{MACEXP} regarding the expansion of + macros has changed slightly due to the additional + \tty{NOEXPAND} option in the macro definition (p4);} + \item{The additional \tty{GLOBAL} option in the macro definition + now additionally allows to define macros that are + uniquely identified by their section name (p4).} + \end{itemize}} +\item{version 1.40: + \begin{itemize} + \item{additionally supports the DSP56000 from Motorola;} + \item{P2BIN can now also extract the lower resp. upper + half of a 32-bit word;} + \item{additionally supports the TLCS-870 and TLCS-47 + families from Toshiba (p1);} + \item{a prefixed \tty{!} now allows to reach machine instructions + hidden by a macro (p1);} + \item{the \tty{GLOBAL} instruction now allows to export symbols + in a qualified style (p1);} + \item{the additional \tty{r} command line switch now allows to + print a list of constructs that forced additional + passes (p1);} + \item{it is now possible to omit an argument to the \tty{E} + command line option; AS will then choose a fitting + default (p1);} + \item{the \tty{t} command line option now allows to suppress + line numbering in the assembly listing (p1);} + \item{escape sequences may now also be used in ASCII style + integer constants (p1);} + \item{the additional pseudo instruction \tty{PADDING} now allows + to enable or disable the insertion of padding bytes + in 680x0 mode (p2);} + \item{\tty{ALIGN} is now a valid instruction for all targets + (p2);} + \item{additionally knows the PIC16C64's SFRs (p2);} + \item{additionally supports the 8096 from Intel (p2);} + \item{\tty{DC} additionally allows to specify a repetition factor + (r3);} + \item{additionally supports the TMS320C2x family from Texas + Instruments (implementation done by Thomas Sailer, ETH + Zurich, r3); P2HEX has been extended appropriately;} + \item{an equation sign may be used instead of \tty{EQU} (r3);} + \item{additional \tty{ENUM} instruction to define enumerations + (r3);} + \item{\tty{END} now has a real effect (r3);} + \item{additional command line switch \tty{n} to get the internal + error numbers in addition to the error messages (r3);} + \item{additionally supports the TLCS-9000 series from + Toshiba (r4);} + \item{additionally supports the TMS370xxx series from Texas + Instruments, including a new \tty{DBIT} pseudo instruction + (r5);} + \item{additionally knows the DS80C320's SFR's (r5);} + \item{the macro processor is now also able to include files + from within macros. This required to modify the + format of error messages slightly. If you use + AS2MSG, replace it with the new version! (r5)} + \item{additionally supports the 80C166 from Siemens (r5);} + \item{additional \tty{VAL} function to evaluate string + expressions (r5);} + \item{it is now possible to construct symbol names with the + help of string expressions enclosed in braces (r5);} + \item{additionally knows the 80C167's peculiarities (r6);} + \item{the MELPS740's special page addressing mode is now + supported (r6);} + \item{it is now possible to explicitly reference a symbol + from a certain section by appending its name enclosed + in brackets. The construction with an \tty{@} sign has + been removed! (r6)} + \item{additionally supports the MELPS-4500 series from + Mitsubishi (r7);} + \item{additionally supports H8/300 and H8/300H series from + Hitachi (r7);} + \item{settings made with \tty{LISTING} resp. \tty{MACEXP} may now be + read back from predefined symbols with the same names + (r7);} + \item{additionally supports the TMS320C3x series from Texas + Instruments (r8);} + \item{additionally supports the SH7000 from Hitachi (r8);} + \item{the Z80 part has been extended to also support the + Z380 (r9);} + \item{the 68K part has been extended to know the + differences of the 683xx micro controllers (r9);} + \item{a label not any more has to be placed in the first + row if it is marked with a double dot (r9);} + \item{additionally supports the 75K0 series from NEC (r9);} + \item{the additional command line option o allows to set + a user-defined name for the code file (r9);} + \item{the \verb!~~! operator has been moved to a bit more senseful + ranking (r9);} + \item{\tty{ASSUME} now also knows the 6809's DPR register and its + implications (pardon, r9);} + \item{the 6809 part now also knows the 6309's secret + extensions (r9);} + \item{binary constants now also may be written in a C-like + notation (r9);} + \end{itemize}} +\item{version 1.41: + \begin{itemize} + \item{the new predefined symbol \tty{MOMSEGMENT} allows to + inquire the currently active segment;} + \item{\tty{:=} is now allowed as a short form for \tty{SET/EVAL};} + \item{the new command line switch \tty{q} allows to force a + ''silent'' assembly;} + \item{the key word \tty{PARENT} to reference the parent section + has been extended by \tty{PARENT0..PARENT9};} + \item{the PowerPC part has been extended by the + microcontroller versions MPC505 and PPC403;} + \item{symbols defined with \tty{SET} or \tty{EQU} may now be assigned + to a certain segment (r1);} + \item{the SH7000 part now also knows the SH7600's + extensions (and should compute correct + displacements...) (r1);} + \item{the 65XX part now differentiates between the 65C02 + and 65SC02 (r1);} + \item{additionally to the symbol \tty{MOMCPU}, there is now also + a string symbol \tty{MOMCPUNAME} that contains the + processor's full name (r1);} + \item{P2HEX now also knows the 32-bit variant of the Intel + hex format (r1);} + \item{additionally knows the 87C750's limitations (r2);} + \item{the internal numbers for fatal errors have been moved + to the area starting at 10000, making more space for + normal error messages (r2);} + \item{unused symbols are now marked with a star in the + symbol table (r2);} + \item{additionally supports the 29K family from AMD (r2);} + \item{additionally supports the M16 family from Mitsubishi + (r2);} + \item{additionally supports the H8/500 family from Hitachi + (r3);} + \item{the number of data bytes printed per line by P2HEX + can now be modified (r3);} + \item{the number of the pass that starts to output warnings + created by the \tty{r} command line switch is now variable + (r3);} + \item{the macro processor now knows a \tty{WHILE} statement that + allows to repeat a piece of code a variable number of + times (r3);} + \item{the \tty{PAGE} instruction now also allows to set the line + with of the assembly listing (r3);} + \item{CPU aliases may now be defined to define new pseudo + processor devices (r3);} + \item{additionally supports the MCS/251 family from Intel + (r3);} + \item{if the cross reference list has been enabled, the + place of the first definition is given for double + definitions of symbols (r3);} + \item{additionally supports the TMS320C5x family from Texas + Instruments (implementation done by Thomas Sailer, + ETH Zurich, r3);} + \item{the OS/2 version should now also correctly work with + long file names. If one doesn't check every s**t + personally... (r3);} + \item{the new pseudo instruction \tty{BIGENDIAN} now allows to + select in MCS-51/251 mode whether constants should + be stored in big endian or little endian format (r3);} + \item{the 680x0 part now differentiates between the full + and reduced MMU instruction set; a manual toggle can + be done via the \tty{FULLPMMU} instruction (r3);} + \item{the new command line option \tty{I} allows to print a list + of all include files paired with their nesting level + (r3);} + \item{additionally supports the 68HC16 family from Motorola + (r3);} + \item{the \tty{END} statement now optionally accepts an argument + as entry point for the program (r3);} + \item{P2BIN and P2HEX now allow to move the contents of a + code file to a different address (r4);} + \item{comments appended to a \tty{SHARED} instruction are now + copied to the share file (r4);} + \item{additionally supports the 68HC12 family from Motorola + (r4);} + \item{additionally supports the XA family from Philips + (r4);} + \item{additionally supports the 68HC08 family from Motorola + (r4);} + \item{additionally supports the AVR family from Atmel (r4);} + \item{to achieve better compatibility to the AS11 from + Motorola, the pseudo instructions \tty{FCB, FDB, FCC}, and + \tty{RMB} were added (r5);} + \item{additionally supports the M16C from Mitsubishi (r5);} + \item{additionally supports the COP8 from National + Semiconductor (r5);} + \item{additional instructions \tty{IFB} and \tty{IFNB} for conditional + assembly (r5);} + \item{the new \tty{EXITM} instruction now allows to terminate a + macro expansion (r5);} + \item{additionally supports the MSP430 from Texas + Instruments (r5);} + \item{\tty{LISTING} now knows the additional variants + \tty{NOSKIPPED} and \tty{PURECODE} to remove code that + was not assembled from the listing (r5);} + \item{additionally supports the 78K0 family from NEC (r5);} + \item{\tty{BIGENDIAN} is now also available in PowerPC mode + (r5);} + \item{additional \tty{BINCLUDE} instruction to include binary + files (r5);} + \item{additional \tty{TOLOWER} and \tty{LOWSTRING} functions to convert + characters to lower case (r5);} + \item{it is now possible to store data in other segments + than \tty{CODE}. The file format has been extended + appropriately (r5);} + \item{the \tty{DS} instruction to reserve memory areas is now + also available in Intel mode (r5);} + \item{the \tty{U} command line switch now allows to switch AS + into a case sensitive mode that differentiates + between upper and lower case in the names of symbols, + user-defined functions, macros, macro parameters, and + sections (r5);} + \item{\tty{SFRB} now also knows the mapping rules for bit + addresses in the RAM areas; warnings are generated + for addresses that are not bit addressable (r5);} + \item{additional instructions \tty{PUSHV} and \tty{POPV} to save symbol + values temporarily (r5);} + \item{additional functions \tty{BITCNT, FIRSTBIT, LASTBIT}, and + \tty{BITPOS} for bit processing (r5);} + \item{the 68360 is now also known as a member of the CPU32 + processors (r5);} + \item{additionally supports the ST9 family from SGS-Thomson + (r6);} + \item{additionally supports the SC/MP from National + Semiconductor (r6);} + \item{additionally supports the TMS70Cxx family from Texas + Instruments (r6);} + \item{additionally supports the TMS9900 family from Texas + Instruments (r6);} + \item{additionally knows the 80296's instruction set + extensions (r6);} + \item{the supported number of Z8 derivatives has been + extended (r6);} + \item{additionally knows the 80C504's mask defects (r6);} + \item{additional register definition file for Siemens' C50x + processors (r6);} + \item{additionally supports the ST7 family from SGS-Thomson + (r6);} + \item{the Tntel pseudo instructions for data disposal are + now also valid for the 65816/MELPS-7700 (r6);} + \item{for the 65816/MELPS-7700, the address length may now + be set explicitly via prefixes (r6);} + \item{additionally supports the 8X30x family from Signetics + (r6);} + \item{from now on, \tty{PADDING} is enabled by default only + for the 680x0 family (r7);} + \item{the new predefined symbol \tty{ARCHITECTURE} can now be + used to query the platform AS was compiled for (r7);} + \item{additional statements \tty{STRUCT} and \tty{ENDSTRUCT} + to define data structures (r7);} + \item{hex and object files for the AVR tools may now be generated + directly (r7);} + \item{\tty{MOVEC} now also knows the 68040's control registers + (r7);} + \item{additional \tty{STRLEN} function to calculate the length + of a string (r7);} + \item{additional ability to define register symbols (r7 currently + only Atmel AVR);} + \item{additionally knows the 6502's undocumented instructions (r7);} + \item{P2HEX and P2BIN now optionally can erase the input files + automatically (r7);} + \item{P2BIN can additionally prepend the entry address to the + resulting image (r7);} + \item{additionally supports the ColdFire family from Motorola as a + variation of the 680x0 core (r7);} + \item{\tty{BYT/FCB, ADR/FDB}, and \tty{FCC} now also allow the + repetition factor known from DC (r7);} + \item{additionally supports Motorola's M*Core (r7);} + \item{the SH7000 part now also knows the SH7700's + extensions (r7);} + \item{the 680x0 part now also knows the 68040's additional + instructions (r7);} + \item{the 56K part now also knows the instruction set extensions + up to the 56300 (r7).} + \item{the new \tty{CODEPAGE} statement now allows to keep several + character sets in parallel (r8);} + \item{The argument variations for \tty{CHARSET} have been extended + (r8);} + \item{New string functions \tty{SUBSTR} and \tty{STRSTR} (r8);} + \item{additional \tty{IRPC} statement in the macro processor (r8);} + \item{additional \tty{RADIX} statement to set the default numbering + system for integer constants (r8);} + \item{instead of {\tt ELSEIF}, it is now valid to simply write {\tt + ELSE} (r8);} + \item{$==$ may be used as equality operator instead of $=$ (r8);} + \item{\tty{BRANCHEXT} for the Philips XA now allows to automatically + extend the reach of short branches (r8);} + \item{debug output is now also possible in NoICE format (r8);} + \item{additionally supports the i960 family from Intel (r8);} + \item{additionally supports the $\mu$PD7720/7725 signal processors + from NEC (r8);} + \item{additionally supports the $\mu$PD77230 signal processor from + NEC (r8);} + \item{additionally supports the SYM53C8xx SCSI processors from + Symbios Logic (r8);} + \item{additionally supports the 4004 from Intel (r8);} + \item{additionally supports the SC14xxx series of National (r8);} + \item{additionally supports the instruction extensions of the PPC + 403GC (r8);} + \item{additional command line option {\tt cpu} to set the default + target processor (r8);} + \item{key files now also may be referenced from the command line + (r8);} + \item{additional command line option {\tt shareout} to set the + output file for SHARED definitions (r8);} + \item{new statement {\tt WRAPMODE} to support AVR processors with + a shortened program counter (r8);} + \item{additionally supports the C20x instruction subset in the C5x + part (r8);} + \item{hexadecimal address specifications for the tools now may also + be made in C notation (r8);} + \item{the numbering system for integer results in \verb!\{...}! + expressions is now configurable via \tty{OUTRADIX} (r8);} + \item{the register syntax for 4004 register pairs has been corrected + (r8);} + \item{additionally supports the F$^{2}$MC8L family from Fujitsu + (r8);} + \item{P2HEX now allows to set the minimum address length for S + record addresses (r8);} + \item{additionally supports the ACE family from Fairchild (r8);} + \item{{\tt REG} is now also allowed for PowerPCs (r8);} + \item{additional switch in P2HEX to relocate all addresses (r8);} + \item{The switch \tty{x} now additionally allows a second level + of detailness to print the source line in question (r8).} + \end{itemize}} +\end{itemize} + +%%=========================================================================== + +\cleardoublepage +\chapter{Hints for the AS Source Code} +\label{ChapSource} + +As I already mentioned in the introduction, I release the source code of +AS on request. The following shall give a few hints to their usage. + +%%--------------------------------------------------------------------------- + +\section{Language Preliminaries} + +In the beginning, AS was a program written in Turbo-Pascal. This was +roughly at the end of the eighties, and there were a couple of reasons for +this choice: First, I was much more used to it than to any C compiler, and +compared to Turbo Pascal's IDE, all DOS-based C compilers were just +crawling along. In the beginning of 1997 however, it became clear that +things had changed: One factor was that Borland had decided to let its +confident DOS developers down (once again, explicitly no 'thank you', you +boneheads from Borland!) and replaced version 7.0 of Borland Pascal with +something called 'Delphi', which is probably a wonderful tool to develop +Windows programs which consist of 90\% user interface and accidentaly a +little bit of content, however completely useless for command-line driven +programs like AS. Furthermore, my focus of operating systems had made a +clear move towards Unix, and I probably could have waited arbitrarily long +for a Borland Pascal for Linux (to all those remarking now that Borland +would be working on something like that: this is {\em Vapourware}, don\'t +believe them anything until you can go into a shop and actually buy it!). +It was therefore clear that C was the way to go. + +After this eperience what results the usage of 'island systems' may have, +I put a big emphasize on portability while doing the translation to C; +however, since AS for example deals with binary data in an exactly format +and uses operating systen-specific functions at some places which may need +adaptions when one compliles AS the first time for a new platform. + +AS is tailored for a C compiler that conforms to the ANSI C standard; C++ +is explicitly not required. If you are still using a compiler conforming +to the outdated Kernighan\&Ritchie standard, you should consider getting a +newer compiler: The ANSI C standard has been fixed in 1989 and there +should be an ANSI C compiler for every contemporary platform, maybe by +using the old compiler to build GNU-C. Though there are some switches in +the source code to bring it nearer to K\&R, this is not an officially +supported feature which I only use internally to support a quite antique +Unix. Everything left to say about K\&R is located in the file {\tt +README.KR}. + +The inclusion of some additional features not present in the Pascal +version (e.g. dynamically loadable message files, test suite, automatic +generation of the documentation from {\em one} source format) has made the +source tree substantially more complicated. I will attempt to unwire +everything step by step: + +%%--------------------------------------------------------------------------- + +\section{Capsuling System dependencies} + +As I already mentioned, As has been tailored to provide maximum platform +independence and portability (at least I believe so...). This means +packing all platform dependencies into as few files as possible. I will +describe these files now, and this section is the first one because it is +probably one of the most important: + +The Build of all components of AS takes place via a central {\tt +Makefile}. To make it work, it has to be accompanied by a fitting {\tt +Makefile.def} that gives the platform dependent settings like compiler +flags. The subdirectory {\tt Makefile.def-samples} contains a couple of +includes that work for widespread platforms (but which need not be +optimal...). In case your platform is not among them, you may take the +file {\tt Makefile.def.tmpl} as a starting point (and send me the +result!). + +A further component to capure system dependencies is the file {\tt +sysdefs.h}. Practically all compilers predefine a couple of preprocessor +symbols that describe the target processor and the used operating system. +For example, on a Sun Sparc under Solaris equipped with the GNU compiler, +the symbols \verb!__sparc! and \verb!__SVR4!. {\tt sysdefs.h} exploits +these symbols to provide a homogeneous environment for the remaining, +system-independent files. Especially, this covers integer datatypes of a +specific length, but it may also include the (re)definition of C functions +which are not present or non-standard-like on a specific platform. It's +best to read this files yourself if you like to know which things may +occur... Generally, the \verb!#ifdef! statement are ordered in two +levels: First, a specific processor platform is selected, the the +operating systems are sorted out in such a section. + +If you port AS to a new platform, you have to find two symbols typical for +this platform and extend {\tt sysdefs.h} accordingly. Once again, I'm +interested in the result... + +%%--------------------------------------------------------------------------- + +\section{System-Independent Files} + +...represent the largest part of all modules. Describing all functions in +detail is beyond the scope of this description (those who want to know +more probably start studying the sources, my programming style isn't that +horrible either...), which is why I can only give a short list at this +place with all modules their function: + +\subsection{Modules Used by AS} + +\subsubsection{as.c} + +This file is AS's root: it contains the {\em main()} function of AS, the +processing of all command line options, the overall control of all passes +and parts of the macro processor. + +\subsubsection{asmallg.c} + +This module processes all statements defined for all processor targets, +e.g. \tty{EQU} and \tty{ORG}. The \tty{CPU} pseudo-op used to switch +among different processor targets is also located here. + +\subsubsection{asmcode.c} + +This module contains the bookkeping needed for the code output file. It +exports an interface that allows to open and close a code file and offers +functions to write code to (or take it back from) the file. An important +job of this module is to buffer the write process, which speeds up +execution by writing the code in larger blocks. + +\subsubsection{asmdebug.c} + +AS can optionally generate debug information for other tools like +simulators or debuggers, allowing a backward reference to the source code. +They get collected in this module and can be output after assembly in one +of several formats. + +\subsubsection{asmdef.c} + +This modules only contains declarations of constants used in different +places and global variables. + +\subsubsection{asmfnums.c} + +AS assigns internally assigns incrementing numbers for each used source +file. These numbers are used for quick referencing. Assignment of +numbers and the conversion between names and numbers takes place here. + +\subsubsection{asmif.c} + +Here ara ll routines located controlling conditional assembly. The most +important exported variable is a flag called \tty{IfAsm} which controls +whether code generation is currently turned on or off. + +\subsubsection{asminclist.c} + +This module holds the definition of the list stucture that allows AS to +print the nesting of include files to the assembly list file. + +\subsubsection{asmitree.c} + +When searching for the mnemonic used in a line of code, a simple linear +comparison with all available machine instructions (as it is still done in +most code generators, for reasons of simplicity and laziness) is not +necessary the most effective method. This module defines two improved +structures (binary tree and hash table) which provide a more efficient +search and are destined to replace the simple linear search on a +step-by-step basis...priorities as needed... + +\subsubsection{asmmac.c} + +Routines to store and execute macro constructs are located in this module. +The real macro processor is (as already mentioned) in {\tt as.c}. + +\subsubsection{asmpars.c} + +Here we really go into the innards: This module stores the symbol tables +(global and local) in two binary trees. Further more, there is a quite +large procedure {\tt EvalExpression} which analyzes and evaluates a (formula) +expression. The procedure returns the result (integer, floating point, or +string) in a varaint record. However, to evaluate expressions during code +generation, one should better use the functions \tty{EvalIntExpression, +EvalFloatExpression}, and \tty{EvalStringExpression}. Modifications for +tha esake of adding new target processors are unnecessary in this modules +and should be done with extreme care, since you are touching something +like 'AS's roots'. + +\subsubsection{asmsub.c} + +This module collects a couple of commonly used subroutines which primarily +deal with error handling and 'advanced' string processing. + +\subsubsection{bpemu.c} + +As already mentioned at the beginning, AS originally was a program written +in Borland Pascal. For some intrinsic functions of the compiler, it was +simpler to emulate those than to touch all places in the source code where +they are used. Well... + +\subsubsection{chunks.c} + +This module defines a data type to deal with a list of address ranges. +This functionality is needed by AS for allocation lists; furthermore, +P2BIN and P2HEX use such lists to warn about overlaps. + +\subsubsection{cmdarg.c} + +This module implements the overall mechanism of command line arguments. +It needs a specification of allowed arguments, splits the command line and +triggers the appropriate callbacks. In detail, the mechanism includes +the following: +\begin{itemize} +\item{Processing of arguments located in an environment variable or + a corresponding file;} +\item{Return of a set describing which command line arguments have not + been processed;} +\item{A backdoor for situations when an overlaying IDE converts the passed + command line completely into upper or lower case.} +\end{itemize} + +\subsubsection{codepseudo.c} + +You will find at this place pseudo instructions that are used by +a subset of code generators. On the one hand, this is the Intel group of +\tty{DB..DT}, and on the other hand their counterparts for 8/16 bit CPUs +from Motorola or Rockwell. Someone who wants to extend AS by a +processor fitting into one of these groups can get the biggest part +of the necessary pseudo instructions with one call to this module. + +\subsubsection{codevars.c} + +For reasons of memory efficiency, some variables commonly used by diverse +code generators. + +\subsubsection{endian.c} + +Yet another bit of machine dependence, however one you do not have to +spend attention on: This module automatically checks at startup whether +a host machine is little or big endian. Furthermore, checks are made if +the type definitions made for integer variables in {\tt sysdefs.h} really +result in the correct lengths. + +\subsubsection{headids.c} + +At this place, all processor families supported by AS are collected with +their header IDs (see chapter \ref{SectCodeFormat}) and the output format +to be used by default by P2HEX. The target of this table is to centralize +the addition of a new processor as most as possible, i.e. in contrast to +earlier versions of AS, no further modifications of tool sources are +necessary. + +\subsubsection{ioerrs.c} + +The conversion from error numbers to clear text messages is located here. +I hope I'll never hit a system that does not define the numbers as macros, +because I would have to rewrite this module completely... + +\subsubsection{nlmessages.c} + +The C version of AS reads all messages from files at runtime after the +language to be used is clear. The format of message files is not a simple +one, but instead a special compact and preindexed format that is generated +at runtime by a program called 'rescomp' (we will talk about it later). +This module is the counterpart to rescomp that reads the correct language +part into a character field and offers functions to access the messages. + +\subsubsection{nls.c} + +This module checks which country-dependent settings (date and time format, +country code) are present at runtime. Unfortunately, this is a highly +operating system-dependend task, and currently, there are only three +methods defines: The MS-DOS method, the OS/2 method and the typical Unix +method via {\em locale} functions. For all other systems, there is +unfortunately currently only \verb!NO_NLS! available... + +\subsubsection{stdhandl.c} + +On the one hand, here is a special open function located knowing the +special strings {\tt !0...!2} as file names and creating duplicates of the +standard file handles {\em stdin, stdout,} and {\em stderr}. On the other +hand, investiagations are done whether the standard output has been +redirected to a device or a file. On no-Unix systems, this unfortunately +also incorporates some special operations. + +\subsubsection{stringlists.c} + +This is just a little 'hack' that defines routines to deal with linear +lists of strings, which are needed e.g. in the macro processor of AS. + +\subsubsection{strutil.c} + +Some commonly needed string operations have found their home here. + +\subsubsection{version.c} + +The currently valid version is centrally stored here for AS and all other +tools. + +\subsubsection{code????.c} + +These modules form the main part of AS: each module contains the code +generator for a specific processor family. + +\subsection{Additional Modules for the Tools} + +\subsubsection{hex.c} + +A small module to convert integer numbers to hexadecimal strings. It's +not absolutely needed in C any more (except for the conversion of {\em +long long} variables, which unfortunately not all {\tt printf()}'s +support), but it somehow survived the porting from Pascal to C. + +\subsubsection{p2bin.c} + +The sources of P2BIN. + +\subsubsection{p2hex.c} + +The sources of P2HEX. + +\subsubsection{pbind.c} + +The sources of BIND. + +\subsubsection{plist.c} + +The sources of PLIST. + +\subsubsection{toolutils.c} + +All subroutines needed by several tools are collected here, e.g. for +reading of code files. + +\section{Modules Needed During the Build of AS} + +\subsubsection{a2k.c} + +This is a minimal filter converting ANSI C source files to +Kernighan-Ritchie style. To be exact: only function heads are converted, +even this only when they are roughly formatted like my programming style. +Noone should therefore think this were a universal C parser! + +\subsubsection{addcr.c} + +A small filter needed during installation on DOS- or OS/2-systems. Since +DOS and OS/2 use a CR/LF for a newline, inc ontrast to the single LF of +Unix systems, all assembly include files provided with AS are sent through +this filter during assembly. + +\subsubsection{bincmp.c} + +For DOS and OS/2, this module takes the task of the {\em cmp} command, +i.e. the binary comparison of files during the test run. While this would +principally be possible with the {\em comp} command provided with the OS, +{\em bincmp} does not have any nasty interactive questions (which seem to +be an adventure to get rid of...) + +\subsubsection{findhyphen.c} + +This is the submodule in {\em tex2doc} providing hyphenation of words. +The algorithm used for this is shamelessly stolen from TeX. + +\subsubsection{grhyph.c} + +The definition of hyphenation rules for the german language. + +\subsubsection{rescomp.c} + +This is AS's 'resource compiler', i.e. the tool that converts a readable +file with string resources into a fast, indexed format. + +\subsubsection{tex2doc.c} + +A tool that converts the LaTeX documentation of AS into an ASCII format. + +\subsubsection{tex2html.c} + +A tool that converts the LaTeX documentation of AS into an HTML document. + +\subsubsection{umlaut.c and unumlaut.c} + +These tiny programs convert national special characters between their +coding in ISO8859-1 (all AS files use this format upon delivery) and their +system-specific coding. Apart from a plain ASCII7 variant, there are +currently the IBM character sets 437 and 850. + +\subsubsection{ushyph.c} + +The definition of hyphenation rules for the english language. + +%%--------------------------------------------------------------------------- + +\section{Generation of Message Files} + +As already mentioned, the C source tree of AS uses a dynamic load +principle for all (error) messages. In contrast to the Pasacl sources +where all messages were bundled in an include file and compiled into the +programs, this method eliminates the need to provide AS in multiple +language variants; there is only one version which checks for the +langugage to be used upon runtime and loads the corresponding component +from the message files. Just to remind: Under DOS and OS/2, the {\tt +COUNTRY} setting is queried, while under Unix, the environment variables +{\tt LC\_MESSAGES, LC\_ALL,} and {\tt LANG} are checked. + +\subsection{Format of the Source Files} + +A source file for the message compiler {\em rescomp} usually has the +suffix {\tt .res}. The message compiler generates one or two files from a +source: +\begin{itemize} +\item{a binary file which is read at runtime by AS resp. its tools} +\item{optionally one further C header file assigning an index number to + all messages. These index numbers in combination with an index + table in the binary file allow a fast access to to individual + messages at runtime.} +\end{itemize} + +The source file for the message compiler is a pure ASCII file and can +therefore be modified with any editor. It consists of a sequence of +control commands with parameters. Empty lines and lines beginning with a +semicolon are ignored. Inclusion of other files is possible via the {\tt +Include} statement: +\begin{verbatim} +Include +\end{verbatim} + +The first two statements in every source file must be two statements +describing the languages defined in the following. The more important one +is {\tt Langs}, e.g.: +\begin{verbatim} +Langs DE(049) EN(001,061) +\end{verbatim} +describes that two languages will be defined in the rest of the file. The +first one shall be used under Unix when the language has been set to {\tt +DE} via environment variable. Similarly, It shall be used under DOS and +OS/2 when the country code was set to 049. Similarly, the second set +shall be used for the settings {\tt DE} resp. 061 or 001. While multiple +'telephone numbers' may point to a single language, the assignment to a +Unix language code is a one-to-one correspondence. This is no problem in +practice since the {\tt LANG} variables Unix uses describe subversions via +appendices, e.g.: +\begin{verbatim} +de.de +de.ch +en.us +\end{verbatim} +AS only compares the beginning of the strings and therefore still comes to +the right decision. +The {\tt Default} statement defines the language that shall be used if +either no language has been set at all or a language is used that is not +mentioned in the asrgument list of {\tt Langs}. This is typically the +english language: +\begin{verbatim} +Default EN +\end{verbatim} +These definitions are followed by an arbitrary number of {\tt Message} +statements, i.e. definitions of messages: +\begin{verbatim} +Message ErrName + ": Fehler " + ": error " +\end{verbatim} +In case {\em n} languages were announced via the {\tt Langs} statement, +the message compiler takes {\bf exactly} the following {\em n} as the +strings to be stored. It is therefore impossible to leave out certain +languages for individual messages, and an empty line following the strings +should in no way be misunderstood as an end marker for the list; inserted +lines between statements only serve purposes of better readability. It is +however allowed to split individual messages across multiple lines in the +source file; all lines except for the last one have to be ended with a +backslash as continuation character: +\begin{verbatim} +Message TestMessage2 + "Dies ist eine" \ + "zweizeilige Nachricht" + "This is a" \ + "two-line message" +\end{verbatim} +As already mentioned, source files are pure ASCII files; national special +characters may be placed in message texts (and the compiler will correctly +pass them to the resulting file), a big disadvantage however is that such +a file is not fully portable any more: in case it is ported to another +system using a different coding for national special characters, the user +will probably be confronted with strange characters at runtime...special +character should therefore always be written via special sequences +borrowed from HTML resp. SGML (see table \ref{TabSpecChars}). Linefeeds +can be inserted into a line via \verb!\n!, similar to C. +\begin{table*}[htb] +\begin{center}\begin{tabular}{|l|l|} +\hline +Sequenz... & ergibt... \\ +\hline +\hline +\verb!ä ö ü! & "a "o "u (Umlaute)\\ +\verb!Ä Ö Ü! & "A "O "U \\ +\verb!ß! & "s (scharfes s) \\ +\verb!à è ì ò ù! & \'a \'e \'i \'o \'u (Accent \\ +\verb!À È Ì Ò Ù! & \'A \'E \'I \'O \'U grave) \\ +\verb!á é í ó ú! & \`a \`e \`i \`o \`u (Accent \\ +\verb!Á É Í Ó Ú! & \`A \`E \`I \`O \`I agiu) \\ +\verb!â ê î ô û! & \^a \^e \^i \^o \^u (Accent \\ +\verb!Â Ê Î Ô Û! & \^A \^E \^I \^O \^U circonflex) \\ +\verb!ç Ç! & \c{c} \c{C}(Cedilla) \\ +\verb!ñ Ñ! & \~n \~N \\ +\verb!å Å! & \aa \AA \\ +\verb!æ &Aelig;! & \ae \AE \\ +\verb!¿ ¡! & umgedrehtes ! oder ? \\ +\hline +\end{tabular}\end{center} +\caption{Sonderzeichenschreibweise des {\em rescomp}\label{TabSpecChars}} +\end{table*} + +%%--------------------------------------------------------------------------- + +\section{Creation of Documentation} + +A source distribution of AS contains this documentation in LaTeX format +only. Other formats are created from this one automatically via tools +provided with AS. One reason is to reduce the size of the source +distribution, another reason is that changes in the documentation only +have to be made once, avoiding inconsistencies. + +LaTex was chosen as the master format because...because...because it's +been there all the time before. Additionally, TeX is almost arbitrarily +portable and fits quite well to the demands of AS. A standard +distribution therefore allows a nice printout on about any printer; for a +conversion to an ASCII file that used to be part of earlier distributions, +the converter {\em tex2doc} is included, additionally the converter {\em +tex2html} allowing to put the manual into the Web. + +Generation of the documentation is started via a simple +\begin{verbatim} +make docs +\end{verbatim} +The two converters mentioned are be built first, then applied to the TeX +documentation and finally, LaTeX itself is called. All this of course for +all languages... + +%%--------------------------------------------------------------------------- + +\section{Test Suite} + +Since AS deals with binary data of a precisely defined structure, it is +naturally sensitive for system and compiler dependencies. To reach at +least a minimum amount of secureness that everything went right during +compilation, a set of test sources is provided in the subdirectory {\tt +tests} that allows to test the freshly built assembler. These programs +are primarily trimmed to find faults in the translation of the machine +instruction set, which are commonplace when integer lenghts vary. +Target-independent features like the macro processors or conditional +assembly are only casually tested, since I assume that they work +everywhere when they work for me... + +The test run is started via a simple {\em make test}. Each test program +is assembled, converted to a binary file, and compared to a reference +image. A test is considered to be passed if and only if the reference +image and the newly generated one are identical on a bit-by-bit basis. At +the end of the test, the assembly time for every test is printed (those +who want may extend the file BENCHES with these results), accompanied with +a success or failure message. Track down every error that occurs, even if +it occurs in a processor target you are never going to use! It is always +possible that this points to an error that may also come up for other +targets, but by coincidence not in the test cases. + +%%--------------------------------------------------------------------------- + +\section{Adding a New Target Processor} + +The probably most common reason to modify the source code of AS is to add +a new target processor. Apart from adding the new module to the +Makefile, there are few places in other modules that need a modification. +The new module will do the rest by registering itself in the list of code +generators. I will describe the needed steps in a cookbook style in the +following sections: + +\subsubsection{Choosing the Processor's Name} + +The name chosen for the new processor has to fulfill two criterias: +\begin{enumerate} +\item{The name must not be already in use by another processor. If one + starts AS without any parameters, a list of the names already in + use will be printed.} +\item{If the name shall appear completely in the symbol \tty{MOMCPU}, it may + not contain other letters than A..F (except right at the + beginning). The variable \tty{MOMCPUNAME} however will always report + the full name during assembly. Special characters are generally + disallowed, lowercase letters will be converted by the \tty{CPU} + command to uppercase letters and are therefore senseless in the + processor name.} +\end{enumerate} + +The first step for registration is making an entry for the new processor +(family) in the file {\tt headids.c}. As already mentioned, this file is +also used by the tools and specifies the code ID assigned to a processor +family, along with the default hex file format to be used. I would like +to have some coordination before choosing the ID... + +\subsubsection{Definition of the Code Generator Module} + +The unit's name that shall be responsible for the new processor +should bear at least some similarity to the processor's name (just +for the sake of uniformity) and should be named in the style of +\tty{code....}. The head with include statements is best taken from +another existing code generator. + +Except for an initialization function that has to be called at the +begginning of the {\tt main()} function in module {\tt as.c}, the new +module neither has to export variables nor functions as the complete +communication is done at runtime via indirect calls. They are simply done +by a call to the function +\tty{AddCPU} for each processor type that shall be treated by this unit: +\begin{verbatim} + CPUxxxx:=AddCPU('XXXX',SwitchTo_xxxx); +\end{verbatim} +\tty{'XXXX'} is the name chosen for the processor which later must be used +in assembler programs to switch AS to this target processor. +\tty{SwitchTo\_xxxx} (abbreviated as the ''switcher'' in the following) is +a procedure without parameters that is called by AS when the switch to the +new processor actually takes place. \tty{AddCPU} delivers an integer +value as result that serves as an internal ''handle'' for the new +processor. The global variable \tty{MomCPU} always contains the handle of +the target processor that is currently set. The value returned by +\tty{AddCPU} should be stored in a private variable of type \tty{CPUVar} +(called \tty{CPUxxxx} in the example above). In case a code generator +module implements more than one processor (e.g. several processors of a +family), the module can find out which instruction subset is currently +allowed by comparing \tty{MomCPU} against the stored handles. + +The switcher's task is to ''reorganize'' AS for the new target +processor. This is done by changing the values of several global +variables: +\begin{itemize} +\item{\tty{ValidSegs}: Not all processors have all address spaces defined + by AS. This set defines which subset the \tty{SEGMENT} instruction + will enable for the currently active target processor. At least the + \tty{CODE} segment has to be enabled. The complete set of allowed + segments can be looked up the file \tty{fileformat.h} (\tty{Seg....} + constants).} +\item{\tty{SegInits}: This array stores the initial program counter values + for the individual segments (i.e. the values the program counters + will initially take when there is no \tty{ORG} statement). There are + only a few exceptions (like logically separated address spaces + that physically overlap) which justify other initial values than + 0.} +\item{\tty{Grans}: This array specifies the size of the smallest addressable + element in bytes for each segment, i.e. the size of an element + that increases an address by 1. Most processors need a value of + 1, even if they are 16- or 32-bit processors, but the PICs and + signal processors are cases where higher values are required.} +\item{\tty{ListGrans}: This array specifies the size of byte groups that shall + be shown in the assembly listing. For example, instruction words + of the 68000 are always 2 bytes long though the code segment's + granularity is 1. The \tty{ListGran} entry therefore has to be set to + 2.} +\item{\tty{SegLimits}: This array stores the highest possible address for + each segment, e.g. 65535 for a 16-bit address space. This array + need not be filled in case the code generator takes over the + {\tt ChkPC} method.} +\item{\tty{ConstMode}: This variable may take the values + \tty{ConstModeIntel}, \tty{ConstModeMoto}, or \tty{ConstModeC} + and rules which syntax has to be used to specify the base of + integer constants.} +\item{\tty{PCSymbol}: This variable contains the string an assembler program + may use to to get the current value of the program counter. + Intel processors for example usually use a dollar sign.} +\item{\tty{TurnWords}: If the target processor uses big-endian addressing and + one of the fields in \tty{ListGran} is larger than one, set this flag + to true to get the correct byte order in the code output file.} +\item{\tty{SetIsOccupied}: Some processors have a \tty{SET} machine instruction. + AS will allow \tty{SET} instructions to pass through to the code + generator and instead use \tty{EVAL} if this flag is set.} +\item{\tty{HeaderID}: This variable contains the ID that is used to mark the + current processor family in the the code output file (see the + description of the code format described by AS). I urge to + contact me before selecting the value to avoid ambiguities. + Values outside the range of \$01..\$7f should be avoided as they + are reserved for special purposes (like a future extension to + allow linkable code). Even though this value is still hard-coded + in most code generators, the preferred method is now to fetch this + value from {\tt headids.h} via {\tt FindFamilyByName}.} +\item{\tty{NOPCode}: There are some situations where AS has to fill unused + code areas with NOP statements. This variable contains the + machine code of the NOP statement.} +\item{\tty{DivideChars}: This string contains the characters that are valid + separation characters for instruction parameters. Only extreme + exotics like the DSP56 require something else than a single comma + in this string.} +\item{\tty{HasAttrs}: Some processors like the 68k series additionally split + an instruction into mnemonic and attribute. If the new processor + also does something like that, set this flag to true and AS will + deliver the instructions' components readily split in the string + variables \tty{OpPart} and \tty{AttrPart}. If this flag is however set to + false, no splitting will take place and the instruction will be + delivered as a single piece in \tty{OpPart}. \tty{AttrPart} will stay empty + in this case. One really should set this flag to false if the + target processor does not have attributes as one otherwise looses + the opportunity to use macros with a name containing dots (e.g. + to emulate other assemblers).} +\item{\tty{AttrChars}: In case \tty{HasAttrs} is true, this string has to contain + all characters that can separate mnemonic and attribute. In most + cases, this string only contains a single dot.} +\end{itemize} +Do not assume that any of these variables has a predefined value; set +them \bb{all}!! + +Apart from these variables, some function pointers have to be set that +form the link form AS to the ''active'' parts of the code +generator: +\begin{itemize} +\item{\tty{MakeCode}: This routine is called after a source line has been + split into mnemonic and parameters. The mnemonic is stored into + the variable \tty{OpPart}, and the parameters can be looked up in the + array \tty{ArgStr}. The number of arguments may be read from + \tty{ArgCnt}. + The binary code has to be stored into the array \tty{BAsmCode}, its + length into \tty{CodeLen}. In case the processor is word oriented + like the 68000 (i.e. the \tty{ListGran} element corresponding to the + currently active segment is 2), the field may be addressed + wordwise via \tty{WAsmCode}. There is also \tty{DAsmCode} for extreme + cases... The code length has to be given in units corresponding + to the current segment's granularity.} +\item{\tty{SwitchFrom}: This parameter-less procedure enables the code generator + module to do ''cleanups'' when AS switches to another target processor. + This hook allows e.g. to free memory that has been allocated in the + generator and that is not needed as long as the generator is not + active. It may point to an empty procedure in the simplest case. + One example for the usage of this hook is the module \tty{CODE370} that + builds its instruction tables dynamically and frees them again after + usage.} +\item{\tty{IsDef}: Some processors know additional instructions that impose + a special meaning on a label in the first row like \tty{EQU} does. One + example is the \tty{BIT} instruction found in an 8051 environment. This + function has to return TRUE if such a special instruction is + present. In the simplest case (no such instructions), the routine + may return a constant FALSE.} +\end{itemize} + +Optionally, the code generator may additionally set the following function +pointers: +\begin{itemize} +\item{\tty{ChkPC} : Though AS internally treats all program counters as + either 32 or 64 bits, most processors use an address space that is + much smaller. This function informs AS whether the current program + counter has exceeded its allowed range. This routine may of course + be much more complicated in case the target processor has more than + one address space. One example is in module \tty{code16c8x.c}. In + case everything is fine, the function has to return TRUE, otherwise + FALSE. The code generator only has to implement this function if + it did not set up the array {\tt SegLimits}. This may e.g. become + necessary when the allowed range of addresses in a segment is + non-continuous.} +\item{\tty{InternSymbol} : Some processorcs, e.g. such with a register + bank in their internal RAM, predefine such 'registers' as symbols, + and it wouldn't make much sense to define them in a separate include + file with 256 or maybe more {\tt EQU}s. This hook allows access to + the code generator of AS: It obtains an expression as an ASCII + string and sets up the passed structure of type {\em TempResult} + accordingly when one of these 'built-in' symbols is detected. The + element {\tt Typ} has to be set to {\tt TempNone} in case the check + failed. Errors messages from this routine should be avoided as + unidentified names could signify ordinary symbols (the parser will + check this afterwards). Be extreme careful with this routine as + it allows you to intervene into the parser's heart!} +\end{itemize} + +By the way: People who want to become immortal may add a copyright +string. This is done by adding a call to the procedure \tty{AddCopyright} +in the module's initialization part (right next to the \tty{AddCPU} calls): +\begin{verbatim} + AddCopyright( + "Intel 80986 code generator (C) 2010 Jim Bonehead"); +\end{verbatim} +The string passed to \tty{AddCopyright} will be printed upon program start +in addition to the standard message. + +If needed, the unit may also use its initialization part to hook into +a list of procedures that are called prior to each pass of assembly. +Such a need for example arises when the module's code generation +depends on certain flags that can be modified via pseudo +instructions. An example is a processor that can operate in either +user or supervisor mode. In user mode, some instructions are +disabled. The flag that tells AS whether the following code executes +in user or supervisor mode might be set via a special pseudo +instruction. But there must also be an initialization that assures +that all passes start with the same state. The hook offered via +\tty{InitPassProc} offers a chance to do such initializations. The +principle is similar to the redirection of an interrupt vector: the +unit saves the old value prior to pointing the procedure variable to +its own routine (the routine must be parameter-less and \tty{FAR} coded). +The new routine first calls the old chain of procedures and +afterwards does its own operations. + +The function chain built up via \tty{CleanUpProc} works similar to +\tty{InitPassProc}: It enables code generators to do clean-ups after +assembly (e.g. freeing of literal tables). This makes sense when +multiple files are assembled with a single call of AS. Otherwise, one +would risk to have 'junk' in tables from the previous run. No module +currently exploits this feature. + +\subsubsection{Writing the Code Generator itself} + +Now we finally reached the point where your creativity is challenged: +It is up to you how you manage to translate mnemonic and parameters +into a sequence of machine code. The symbol tables are of course +accessible (via the formula parser) just like everything exported +from \tty{ASMSUB}. Some general rules (take them as advises and not as +laws...): +\begin{itemize} +\item{Try to split the instruction set into groups of instructions that + have the same operand syntax and that differ only in a few bits + of their machine code. For example, one can do all instructions + without parameters in a single table this way.} +\item{Most processors have a fixed spectrum of addressing modes. Place + the parsing of an address expression in a separate routine so you + an reuse the code.} +\item{The subroutine \tty{WrError} defines a lot of possible error codes and + can be easily extended. Use this! It is no good to simply issue + a ''syntax error'' on all error conditions!} +\end{itemize} +Studying other existing code generators should also prove to be +helpful. + +\subsubsection{Modifications of Tools} + +A microscopic change to the tolls' sources is still necessary, namely to +the routine {\tt Granularity()} in {\tt toolutils.c}: in case one of the +processor's address spaces has a granularity different to 1, the swich +statement in this place has to be adapted accordingly, otherwise PLIST, +P2BIN, and P2HEX start counting wrong... + +\section{Localization to a New Language} + +You are interested in this topic? Wonderful! This is an issue that is +often neglected by other programmers, especially when they come from the +country on the other side of the big lake... + +The localization to a new language can be split into two parts: the +adaption of program messages and the translation of the manual. The +latter one is definitely a work of gigantic size, however, the adaption of +program messages should be a work doable on two or three weekends, given +that one knows both the new and one of the already present messages. +Unfortunately, this translation cannot be done on a step-by-step basis +because the resource compiler currently cannot deal with a variable amount +of languages for different messages, so the slogan is 'all or nothing'. + +The first oeration is to add the new language to {\tt header.res}. The +two-letter-abbreviation used for this language is best fetched from the +nearest Unix system (in case you don't work on one anyway...), the +international telephone prefix from a DOS manual. + +When this is complete, one can rebuild all necessary parts with a simple +{\em make} and obtains an assembler that supports one more language. Do +not forget to forward the results to me. This way, all users will benefit +from this with the next release :-) + +%%=========================================================================== + +\cleardoublepage +\begin{thebibliography}{99} + +\bibitem{Williams} Steve Williams: \\ + {\em 68030 Assembly Language Reference.\/} \\ + Addison-Wesley, Reading, Massachusetts, 1989 + +\bibitem{AMD29K} Advanced Micro Devices: \\ + {\em AM29240, AM29245, and AM29243 RISC + Microcontrollers.\/} \\ + 1993 + +\bibitem{AtAVR} Atmel Corp.: \\ + {\em AVR Enhanced RISC Microcontroller Data Book.\/} \\ + May 1996 + +\bibitem{AVRObj} Atmel Corp.: \\ + {\em 8-Bit AVR Assembler and Simulator Object File + Formats (Preliminary).\/} \\ + (part of the AVR tools documentation) + +\bibitem{CMD816} CMD Microcircuits: \\ + {\em G65SC802/G65SC816 CMOS 8/16-Bit Microprocessor.\/} \\ + Family Data Sheet. + +\bibitem{CPM68K} Digital Research: \\ + {\em CP/M 68K Operating System User's Guide.\/} \\ + 1983 + +\bibitem{Cyrix} Cyrix Corp.: \\ + {\em FasMath 83D87 User's Manual.\/} \\ + 1990 + +\bibitem{Dallas320} Dallas Semiconductor: \\ + {\em DS80C320 High-Speed Micro User's Guide.\/} \\ + Version 1.30, 1/94 + +\bibitem{Fair1101} Fairchild Semiconductor: \\ + {\em ACE1101 Data Sheet.\/} \\ + Preliminary, May 1999 + +\bibitem{Fair1202} Fairchild Semiconductor: \\ + {\em ACE1202 Data Sheet.\/} \\ + Preliminary, May 1999 + +\bibitem{Fair8004} Fairchild Semiconductor: \\ + {\em ACEx Guide to Developer Tools.\/} + AN-8004, Version 1.3 September 1998 + +\bibitem{FujitsuCD} Fujitsu Limited: \\ + {\em June 1998 Semiconductor Data Book.\/} \\ + CD00-00981-1E + +\bibitem{Hit180} Hitachi Ltd.: \\ + {\em 8-/16-Bit Microprocessor Data Book.\/} \\ + 1986 + +\bibitem{Hit63} Trevor J.Terrel \& Robert J. Simpson: \\ + {\em Understanding HD6301X/03X CMOS Microprocessor + Systems.\/} \\ + published by Hitachi + +\bibitem{HitH8_3} Hitachi Microcomputer: \\ + {\em H8/300H Series Programming Manual.\/} \\ + (21-032, no year of release given) + +\bibitem{SH7000} Hitachi Semiconductor Design \& Development Center: \\ + {\em SH Microcomputer Hardware Manual + (Preliminary).\/} + +\bibitem{SH7700} Hitachi Semiconductor and IC Div.: \\ + {\em SH7700 Series Programming Manual.\/} \\ + 1st Edition, September 1995 + +\bibitem{HitH8_5} Hitachi Semiconductor and IC Div.: \\ + {\em H8/500 Series Programming Manual.\/} \\ + (21-20, 1st Edition Feb. 1989) + +\bibitem{HitH8_532} Hitachi Ltd.: \\ + {\em H8/532 Hardware Manual.\/} \\ + (21-30, no year of release given) + +\bibitem{HitH8_534} Hitachi Ltd.: \\ + {\em H8/534,H8/536 Hardware Manual.\/} \\ + (21-19A, no year of release given) + +\bibitem{PPC403} IBM Corp.: \\ + {\em PPC403GA Embedded Controller User's Manual.\/} \\ + First Edition, September 1994 + +\bibitem{IntEmb} Intel Corp.: \\ + {\em Embedded Controller Handbook.\/} \\ + 1987 + +\bibitem{IntMic} Intel Corp.: \\ + {\em Microprocessor and Peripheral Handbook, Volume I + Microprocessor.\/} \\ + 1988 + +\bibitem{Int960} Intel Corp. : \\ + {\em 80960SA/SB Reference Manual.\/} \\ + 1991 + +\bibitem{Int196} Intel Corp.: \\ + {\em 8XC196NT Microcontroller User's Manual.\/} \\ + June 1995 + +\bibitem{Int251} Intel Corp.: \\ + {\em 8XC251SB High Performance CHMOS Single-Chip + Microcontroller.\/} \\ + Sept. 1995, Order Number 272616-003 + +\bibitem{Int296} Intel Corp.: \\ + {\em 80296SA Microcontroller User's Manual.\/} \\ + Sept. 1996 + +\bibitem{Kaku} Hirotsugu Kakugawa: \\ + {\em A memo on the secret features of 6309.\/} \\ + (available via World Wide Web: \\ + http://www.cs.umd.edu/users/fms/comp/CPUs/6309.txt) + +\bibitem{MicroChip} Microchip Technology Inc.: \\ + {\em Microchip Data Book.\/} \\ + 1993 Edition + +\bibitem{Mit41} Mitsubishi Electric: \\ + {\em Single-Chip 8-Bit Microcomputers.\/} \\ + Vol.2, 1987 + +\bibitem{Mit16} Mitsubishi Electric: \\ + {\em Single-Chip 16-Bit Microcomputers.\/} \\ + Enlarged edition, 1991 + +\bibitem{Mit8} Mitsubishi Electric: \\ + {\em Single-Chip 8 Bit Microcomputers.\/} \\ + Vol.2, 1992 + +\bibitem{Mit4500} Mitsubishi Electric: \\ + {\em M34550Mx-XXXFP Users's Manual.\/} \\ + Jan. 1994 + +\bibitem{MitM16} Mitsubishi Electric: \\ + {\em M16 Family Software Manual.\/} \\ + First Edition, Sept. 1994 + +\bibitem{MitM16C} Mitsubishi Electric: \\ + {\em M16C Software Manual.\/} \\ + First Edition, Rev. C, 1996 + +\bibitem{Mit30600} Mitsubishi Electric: \\ + {\em M30600-XXXFP Data Sheet.\/} \\ + First Edition, April 1996 + +\bibitem{GreenM16} documentation about the M16/M32-developer's package + from Green Hills Software + +\bibitem{MotMic} Motorola Inc.: \\ + {\em Microprocessor, Microcontroller and Peripheral + Data.\/} \\ + Vol. I+II, 1988 + +\bibitem{Mot81} Motorola Inc.: \\ + {\em MC68881/882 Floating Point Coprocessor User's + Manual.\/} \\ + Second Edition, Prentice-Hall, Englewood Cliffs 1989 + +\bibitem{Mot51} Motorola Inc.: \\ + {\em MC68851 Paged Memory Management Unit User's + Manual.\/} \\ + Second Edition, Prentice-Hall, Englewood Cliffs 1989,1988 + +\bibitem{Mot32} Motorola Inc.: \\ + {\em CPU32 Reference Manual.\/} \\ + Rev. 1, 1990 + +\bibitem{Mot56} Motorola Inc.: \\ + {\em DSP56000/DSP56001 Digital Signal Processor User's + Manual.\/} \\ + Rev. 2, 1990 + +\bibitem{Mot340} Motorola Inc.: \\ + {\em MC68340 Technical Summary.\/} \\ + Rev. 2, 1991 + +\bibitem{Mot16} Motorola Inc.: \\ + {\em CPU16 Reference Manual.\/} \\ + Rev. 1, 1991 + +\bibitem{Mot68K} Motorola Inc.: \\ + {\em Motorola M68000 Family Programmer's + Reference Manual.\/} \\ + 1992 + +\bibitem{Mot332} Motorola Inc.: \\ + {\em MC68332 Technical Summary.\/} \\ + Rev. 2, 1993 + +\bibitem{Mot601} Motorola Inc.: \\ + {\em PowerPC 601 RISC Microprocessor User's Manual.\/} \\ + 1993 + +\bibitem{Mot505} Motorola Inc.: \\ + {\em PowerPC(tm) MPC505 RISC Microcontroller Technical + Summary.\/} \\ + 1994 + +\bibitem{Mot12} Motorola Inc.: \\ + {\em CPU12 Reference Manual.\/} \\ + 1st edition, 1996 + +\bibitem{Mot08} Motorola Inc.: \\ + {\em CPU08 Reference Manual.\/} \\ + Rev. 1 (no year of release given im PDF-File) + +\bibitem{Mot360} Motorola Inc.: \\ + {\em MC68360 User's Manual.\/} + +\bibitem{MotCold} Motorola Inc.: \\ + {\em MCF 5200 ColdFire Family Programmer's Reference + Manual.\/} \\ + 1995 + +\bibitem{MotMCore} Motorola Inc.: \\ + {\em M*Core Programmer's Reference Manual.\/} \\ + 1997 + +\bibitem{Mot56300} Motorola Inc.: \\ + {\em DSP56300 24-Bit Digital Signal Processor + Family Manual.\/} \\ + Rev. 0 (no year of release given im PDF-File) + +\bibitem{SCMP} National Semiconductor: \\ + {\em SC/MP Program\-mier- und + As\-sem\-bler-Hand\-buch.\/} \\ + Publication Number 4200094A, Aug. 1976 + +\bibitem{AsmCop} National Semiconductor: \\ + {\em COP800 Assembler/Linker/Librarian User's + Manual.\/} \\ + Customer Order Number COP8-ASMLNK-MAN \\ + NSC Publication Number 424421632-001B \\ + August 1993 + +\bibitem{Cop87L84} National Semiconductor: \\ + {\em COP87L84BC microCMOS One-Time-Programmable (OTP) + Microcontroller.\/} \\ + Preliminary, March 1996 + +\bibitem{Nat14xxx} National Semiconductor: \\ + {\em SC14xxx DIP commands Reference guide.} \\ + Application Note AN-D-031, Version 0.4, 12-28-1998 + +\bibitem{NECV} NEC Corp.: \\ + {\em $\mu$pD70108/$\mu$pD70116/$\mu$pD70208/$\mu$pD70216/$\mu$pD72091 + Data Book.\/} \\ + (no year of release given) + +\bibitem{NEC78K0} NEC Electronics Europe GmbH: \\ + {\em User's Manual $\mu$COM-87 AD Family.\/} \\ + (no year of release given) + +\bibitem{NEC75} NEC Corp.: \\ + {\em $\mu$COM-75x Family 4-bit CMOS Microcomputer User's + Manual.\/} \\ + Vol. I+II (no year of release given) + +\bibitem{NECSig} NEC Corp.: \\ + {\em Digital Signal Processor Product Description.\/} \\ + PDDSP.....067V20 (no year of release given) + +\bibitem{NEC78} NEC Corp.: \\ + {\em $\mu$PD78070A, 78070AY 8-Bit Single-Chip Microcontroller + User's Manual.\/} \\ + Document No. U10200EJ1V0UM00 (1st edition), August 1995 + +\bibitem{NEC7814} NEC Corp.: \\ + {\em Data Sheet $\mu$PD78014.\/} + +\bibitem{PhilXA} Philips Semiconductor: \\ + {\em 16-bit 80C51XA Microcontrollers (eXtended + Architecture).\/} \\ + Data Handbook IC25, 1996 + +\bibitem{SGS04} SGS-Thomson Microelectronics: \\ + {\em 8 Bit MCU Families EF6801/04/05 Databook.\/}\\ + 1st edition, 1989 + +\bibitem{SGS62} SGS-Thomson Microelectronics: \\ + {\em ST6210/ST6215/ST6220/ST6225 Databook.\/} \\ + 1st edition, 1991 + +\bibitem{ST7Man} SGS-Thomson Microelectronics: \\ + {\em ST7 Family Programming Manual.\/} \\ + June 1995 + +\bibitem{SGS9} SGS-Thomson Microelectronics: \\ + {\em ST9 Programming Manual.\/} \\ + 3rd edition, 1993 + +\bibitem{Siem166} Siemens AG: \\ + {\em SAB80C166/83C166 User's Manual.\/} \\ + Edition 6.90 + +\bibitem{Siem167} Siemens AG: \\ + {\em SAB C167 Preliminary User's Manual.\/} \\ + Revision 1.0, July 1992 + +\bibitem{Siem502} Siemens AG: \\ + {\em SAB-C502 8-Bit Single-Chip Microcontroller User's + Manual.\/} \\ + Edition 8.94 + +\bibitem{Siem501} Siemens AG: \\ + {\em SAB-C501 8-Bit Single-Chip Microcontroller User's + Manual.\/} \\ + Edition 2.96 + +\bibitem{Siem504} Siemens AG: \\ + {\em C504 8-Bit CMOS Microcontroller User's Manual.\/} \\ + Edition 5.96 + +\bibitem{Syb68K} C.Vieillefond: \\ + {\em Programmierung des 68000.\/} \\ + Sybex-Verlag D\"usseldorf, 1985 + +\bibitem{Sym8xx} Symbios Logic Inc: \\ + {\em Symbios Logic PCI-SCSI-I/O Processors Programming + Guide.\/} \\ + Version 2.0, 1995/96 + +\bibitem{Ti990} Texas Instruments: \\ + {\em Model 990 Computer/TMS9900 Microprocessor + Assembly Language Programmer's Guide.\/} \\ + 1977, Manual No. 943441-9701 + +\bibitem{Ti9900} Texas Instruments: \\ + {\em TMS9995 16-Bit Microcomputer.\/} \\ + Preliminary Data Manual + 1981 + +\bibitem{TiC10} Texas Instruments: \\ + {\em First-Generation TMS320 User's Guide.\/} \\ + 1988, ISBN 2-86886-024-9 + +\bibitem{Ti7000} Texas Instruments: \\ + {\em TMS7000 family Data Manual.\/} \\ + 1991, DB103 + +\bibitem{TiC30} Texas Instruments: \\ + {\em TMS320C3x User's Guide.\/} \\ + Revision E, 1991 + +\bibitem{TiC20} Texas Instruments: \\ + {\em TMS320C2x User's Guide.\/} \\ + Revision C, Jan. 1993 + +\bibitem{Ti370} Texas Instruments: \\ + {\em TMS370 Family Data Manual.\/} \\ + 1994, SPNS014B + +\bibitem{Ti430FamSoft} Texas Instruments: \\ + {\em MSP430 Family Software User's Guide.\/} \\ + 1994, SLAUE11 + +\bibitem{Ti430Met} Texas Instruments: \\ + {\em MSP430 Metering Application.\/} \\ + 1996, SLAAE10A + +\bibitem{Ti430FamArch} Texas Instruments: \\ + {\em MSP430 Family Architecture User's Guide.\/} \\ + 1995, SLAUE10A + +\bibitem{TiC60} Texas Instruments: \\ + {\em TMS320C62xx CPU and Instruction Set Reference + Manual.\/} \\ + Jan. 1997, SPRU189A + +\bibitem{TiC20x} Texas Instruments: \\ + {\em TMS320C20x User's Guide.\/} \\ + April 1999, SPRU127C + +\bibitem{Tosh90} Toshiba Corp.: \\ + {\em 8-Bit Microcontroller TLCS-90 Development System + Manual.\/} \\ + 1990 + +\bibitem{Tosh870} Toshiba Corp.: \\ + {\em 8-Bit Microcontroller TLCS-870 Series Data + Book.\/} \\ + 1992 + +\bibitem{Tosh900} Toshiba Corp.: \\ + {\em 16-Bit Microcontroller TLCS-900 Series Users + Manual.\/} \\ + 1992 + +\bibitem{Tosh900L} Toshiba Corp.: \\ + {\em 16-Bit Microcontroller TLCS-900 Series Data Book: + TMP93CM40F/ TMP93CM41F.\/} \\ + 1993 + +\bibitem{Tosh47} Toshiba Corp.: \\ + {\em 4-Bit Microcontroller TLCS-47E/47/470/470A Development + System Manual.\/} \\ + 1993 + +\bibitem{Tosh9000} Toshiba Corp.: \\ + {\em TLCS-9000/16 Instruction Set Manual Version + 2.2.\/} \\ + 10. Feb 1994 + +\bibitem{Val8X} Valvo GmbH: \\ + {\em Bipolare Mikroprozessoren und bipolare + LSI-Schaltungen.\/} \\ + Datenbuch, 1985, ISBN 3-87095-186-9 + +\bibitem{Zilog} data sheets from Zilog about the Z80 family + +\bibitem{ZilZ8} Zilog Inc.: \\ + {\em Z8 Microcontrollers Databook.\/} \\ + 1992 + +\bibitem{ZilZ8_2} Zilog Inc.: \\ + {\em Discrete Z8 Microcontrollers Databook.\/} \\ + (no year of release given) + +\bibitem{ZilZ380} Zilog Inc.: \\ + {\em Z380 CPU Central Processing Unit User's + Manual.\/} \\ + (no year of release given) + +\end{thebibliography} + +\cleardoublepage + +\printindex + +\end{document} -- cgit v1.2.3